15796c8dcSSimon Schubert /* Extended regular expression matching and search library,
25796c8dcSSimon Schubert version 0.12.
35796c8dcSSimon Schubert (Implements POSIX draft P1003.2/D11.2, except for some of the
45796c8dcSSimon Schubert internationalization features.)
55796c8dcSSimon Schubert
65796c8dcSSimon Schubert Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
7*ef5ccd6cSJohn Marino 2002, 2005, 2010, 2013 Free Software Foundation, Inc.
85796c8dcSSimon Schubert This file is part of the GNU C Library.
95796c8dcSSimon Schubert
105796c8dcSSimon Schubert The GNU C Library is free software; you can redistribute it and/or
115796c8dcSSimon Schubert modify it under the terms of the GNU Lesser General Public
125796c8dcSSimon Schubert License as published by the Free Software Foundation; either
135796c8dcSSimon Schubert version 2.1 of the License, or (at your option) any later version.
145796c8dcSSimon Schubert
155796c8dcSSimon Schubert The GNU C Library is distributed in the hope that it will be useful,
165796c8dcSSimon Schubert but WITHOUT ANY WARRANTY; without even the implied warranty of
175796c8dcSSimon Schubert MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
185796c8dcSSimon Schubert Lesser General Public License for more details.
195796c8dcSSimon Schubert
205796c8dcSSimon Schubert You should have received a copy of the GNU Lesser General Public
215796c8dcSSimon Schubert License along with the GNU C Library; if not, write to the Free
225796c8dcSSimon Schubert Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
235796c8dcSSimon Schubert 02110-1301 USA. */
245796c8dcSSimon Schubert
255796c8dcSSimon Schubert /* This file has been modified for usage in libiberty. It includes "xregex.h"
265796c8dcSSimon Schubert instead of <regex.h>. The "xregex.h" header file renames all external
275796c8dcSSimon Schubert routines with an "x" prefix so they do not collide with the native regex
285796c8dcSSimon Schubert routines or with other components regex routines. */
295796c8dcSSimon Schubert /* AIX requires this to be the first thing in the file. */
305796c8dcSSimon Schubert #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
315796c8dcSSimon Schubert #pragma alloca
325796c8dcSSimon Schubert #endif
335796c8dcSSimon Schubert
345796c8dcSSimon Schubert #undef _GNU_SOURCE
355796c8dcSSimon Schubert #define _GNU_SOURCE
365796c8dcSSimon Schubert
375796c8dcSSimon Schubert #ifndef INSIDE_RECURSION
385796c8dcSSimon Schubert # ifdef HAVE_CONFIG_H
395796c8dcSSimon Schubert # include <config.h>
405796c8dcSSimon Schubert # endif
415796c8dcSSimon Schubert #endif
425796c8dcSSimon Schubert
435796c8dcSSimon Schubert #include <ansidecl.h>
445796c8dcSSimon Schubert
455796c8dcSSimon Schubert #ifndef INSIDE_RECURSION
465796c8dcSSimon Schubert
475796c8dcSSimon Schubert # if defined STDC_HEADERS && !defined emacs
485796c8dcSSimon Schubert # include <stddef.h>
49*ef5ccd6cSJohn Marino # define PTR_INT_TYPE ptrdiff_t
505796c8dcSSimon Schubert # else
515796c8dcSSimon Schubert /* We need this for `regex.h', and perhaps for the Emacs include files. */
525796c8dcSSimon Schubert # include <sys/types.h>
53*ef5ccd6cSJohn Marino # define PTR_INT_TYPE long
545796c8dcSSimon Schubert # endif
555796c8dcSSimon Schubert
565796c8dcSSimon Schubert # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
575796c8dcSSimon Schubert
585796c8dcSSimon Schubert /* For platform which support the ISO C amendement 1 functionality we
595796c8dcSSimon Schubert support user defined character classes. */
605796c8dcSSimon Schubert # if defined _LIBC || WIDE_CHAR_SUPPORT
615796c8dcSSimon Schubert /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
625796c8dcSSimon Schubert # include <wchar.h>
635796c8dcSSimon Schubert # include <wctype.h>
645796c8dcSSimon Schubert # endif
655796c8dcSSimon Schubert
665796c8dcSSimon Schubert # ifdef _LIBC
675796c8dcSSimon Schubert /* We have to keep the namespace clean. */
685796c8dcSSimon Schubert # define regfree(preg) __regfree (preg)
695796c8dcSSimon Schubert # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
705796c8dcSSimon Schubert # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
715796c8dcSSimon Schubert # define regerror(errcode, preg, errbuf, errbuf_size) \
725796c8dcSSimon Schubert __regerror(errcode, preg, errbuf, errbuf_size)
735796c8dcSSimon Schubert # define re_set_registers(bu, re, nu, st, en) \
745796c8dcSSimon Schubert __re_set_registers (bu, re, nu, st, en)
755796c8dcSSimon Schubert # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
765796c8dcSSimon Schubert __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
775796c8dcSSimon Schubert # define re_match(bufp, string, size, pos, regs) \
785796c8dcSSimon Schubert __re_match (bufp, string, size, pos, regs)
795796c8dcSSimon Schubert # define re_search(bufp, string, size, startpos, range, regs) \
805796c8dcSSimon Schubert __re_search (bufp, string, size, startpos, range, regs)
815796c8dcSSimon Schubert # define re_compile_pattern(pattern, length, bufp) \
825796c8dcSSimon Schubert __re_compile_pattern (pattern, length, bufp)
835796c8dcSSimon Schubert # define re_set_syntax(syntax) __re_set_syntax (syntax)
845796c8dcSSimon Schubert # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
855796c8dcSSimon Schubert __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
865796c8dcSSimon Schubert # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
875796c8dcSSimon Schubert
885796c8dcSSimon Schubert # define btowc __btowc
895796c8dcSSimon Schubert
905796c8dcSSimon Schubert /* We are also using some library internals. */
915796c8dcSSimon Schubert # include <locale/localeinfo.h>
925796c8dcSSimon Schubert # include <locale/elem-hash.h>
935796c8dcSSimon Schubert # include <langinfo.h>
945796c8dcSSimon Schubert # include <locale/coll-lookup.h>
955796c8dcSSimon Schubert # endif
965796c8dcSSimon Schubert
975796c8dcSSimon Schubert /* This is for other GNU distributions with internationalized messages. */
985796c8dcSSimon Schubert # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
995796c8dcSSimon Schubert # include <libintl.h>
1005796c8dcSSimon Schubert # ifdef _LIBC
1015796c8dcSSimon Schubert # undef gettext
1025796c8dcSSimon Schubert # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
1035796c8dcSSimon Schubert # endif
1045796c8dcSSimon Schubert # else
1055796c8dcSSimon Schubert # define gettext(msgid) (msgid)
1065796c8dcSSimon Schubert # endif
1075796c8dcSSimon Schubert
1085796c8dcSSimon Schubert # ifndef gettext_noop
1095796c8dcSSimon Schubert /* This define is so xgettext can find the internationalizable
1105796c8dcSSimon Schubert strings. */
1115796c8dcSSimon Schubert # define gettext_noop(String) String
1125796c8dcSSimon Schubert # endif
1135796c8dcSSimon Schubert
1145796c8dcSSimon Schubert /* The `emacs' switch turns on certain matching commands
1155796c8dcSSimon Schubert that make sense only in Emacs. */
1165796c8dcSSimon Schubert # ifdef emacs
1175796c8dcSSimon Schubert
1185796c8dcSSimon Schubert # include "lisp.h"
1195796c8dcSSimon Schubert # include "buffer.h"
1205796c8dcSSimon Schubert # include "syntax.h"
1215796c8dcSSimon Schubert
1225796c8dcSSimon Schubert # else /* not emacs */
1235796c8dcSSimon Schubert
1245796c8dcSSimon Schubert /* If we are not linking with Emacs proper,
1255796c8dcSSimon Schubert we can't use the relocating allocator
1265796c8dcSSimon Schubert even if config.h says that we can. */
1275796c8dcSSimon Schubert # undef REL_ALLOC
1285796c8dcSSimon Schubert
1295796c8dcSSimon Schubert # if defined STDC_HEADERS || defined _LIBC
1305796c8dcSSimon Schubert # include <stdlib.h>
1315796c8dcSSimon Schubert # else
1325796c8dcSSimon Schubert char *malloc ();
1335796c8dcSSimon Schubert char *realloc ();
1345796c8dcSSimon Schubert # endif
1355796c8dcSSimon Schubert
1365796c8dcSSimon Schubert /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
1375796c8dcSSimon Schubert If nothing else has been done, use the method below. */
1385796c8dcSSimon Schubert # ifdef INHIBIT_STRING_HEADER
1395796c8dcSSimon Schubert # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
1405796c8dcSSimon Schubert # if !defined bzero && !defined bcopy
1415796c8dcSSimon Schubert # undef INHIBIT_STRING_HEADER
1425796c8dcSSimon Schubert # endif
1435796c8dcSSimon Schubert # endif
1445796c8dcSSimon Schubert # endif
1455796c8dcSSimon Schubert
1465796c8dcSSimon Schubert /* This is the normal way of making sure we have a bcopy and a bzero.
1475796c8dcSSimon Schubert This is used in most programs--a few other programs avoid this
1485796c8dcSSimon Schubert by defining INHIBIT_STRING_HEADER. */
1495796c8dcSSimon Schubert # ifndef INHIBIT_STRING_HEADER
1505796c8dcSSimon Schubert # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
1515796c8dcSSimon Schubert # include <string.h>
1525796c8dcSSimon Schubert # ifndef bzero
1535796c8dcSSimon Schubert # ifndef _LIBC
1545796c8dcSSimon Schubert # define bzero(s, n) (memset (s, '\0', n), (s))
1555796c8dcSSimon Schubert # else
1565796c8dcSSimon Schubert # define bzero(s, n) __bzero (s, n)
1575796c8dcSSimon Schubert # endif
1585796c8dcSSimon Schubert # endif
1595796c8dcSSimon Schubert # else
1605796c8dcSSimon Schubert # include <strings.h>
1615796c8dcSSimon Schubert # ifndef memcmp
1625796c8dcSSimon Schubert # define memcmp(s1, s2, n) bcmp (s1, s2, n)
1635796c8dcSSimon Schubert # endif
1645796c8dcSSimon Schubert # ifndef memcpy
1655796c8dcSSimon Schubert # define memcpy(d, s, n) (bcopy (s, d, n), (d))
1665796c8dcSSimon Schubert # endif
1675796c8dcSSimon Schubert # endif
1685796c8dcSSimon Schubert # endif
1695796c8dcSSimon Schubert
1705796c8dcSSimon Schubert /* Define the syntax stuff for \<, \>, etc. */
1715796c8dcSSimon Schubert
1725796c8dcSSimon Schubert /* This must be nonzero for the wordchar and notwordchar pattern
1735796c8dcSSimon Schubert commands in re_match_2. */
1745796c8dcSSimon Schubert # ifndef Sword
1755796c8dcSSimon Schubert # define Sword 1
1765796c8dcSSimon Schubert # endif
1775796c8dcSSimon Schubert
1785796c8dcSSimon Schubert # ifdef SWITCH_ENUM_BUG
1795796c8dcSSimon Schubert # define SWITCH_ENUM_CAST(x) ((int)(x))
1805796c8dcSSimon Schubert # else
1815796c8dcSSimon Schubert # define SWITCH_ENUM_CAST(x) (x)
1825796c8dcSSimon Schubert # endif
1835796c8dcSSimon Schubert
1845796c8dcSSimon Schubert # endif /* not emacs */
1855796c8dcSSimon Schubert
1865796c8dcSSimon Schubert # if defined _LIBC || HAVE_LIMITS_H
1875796c8dcSSimon Schubert # include <limits.h>
1885796c8dcSSimon Schubert # endif
1895796c8dcSSimon Schubert
1905796c8dcSSimon Schubert # ifndef MB_LEN_MAX
1915796c8dcSSimon Schubert # define MB_LEN_MAX 1
1925796c8dcSSimon Schubert # endif
1935796c8dcSSimon Schubert
1945796c8dcSSimon Schubert /* Get the interface, including the syntax bits. */
1955796c8dcSSimon Schubert # include "xregex.h" /* change for libiberty */
1965796c8dcSSimon Schubert
1975796c8dcSSimon Schubert /* isalpha etc. are used for the character classes. */
1985796c8dcSSimon Schubert # include <ctype.h>
1995796c8dcSSimon Schubert
2005796c8dcSSimon Schubert /* Jim Meyering writes:
2015796c8dcSSimon Schubert
2025796c8dcSSimon Schubert "... Some ctype macros are valid only for character codes that
2035796c8dcSSimon Schubert isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
2045796c8dcSSimon Schubert using /bin/cc or gcc but without giving an ansi option). So, all
2055796c8dcSSimon Schubert ctype uses should be through macros like ISPRINT... If
2065796c8dcSSimon Schubert STDC_HEADERS is defined, then autoconf has verified that the ctype
2075796c8dcSSimon Schubert macros don't need to be guarded with references to isascii. ...
2085796c8dcSSimon Schubert Defining isascii to 1 should let any compiler worth its salt
2095796c8dcSSimon Schubert eliminate the && through constant folding."
2105796c8dcSSimon Schubert Solaris defines some of these symbols so we must undefine them first. */
2115796c8dcSSimon Schubert
2125796c8dcSSimon Schubert # undef ISASCII
2135796c8dcSSimon Schubert # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
2145796c8dcSSimon Schubert # define ISASCII(c) 1
2155796c8dcSSimon Schubert # else
2165796c8dcSSimon Schubert # define ISASCII(c) isascii(c)
2175796c8dcSSimon Schubert # endif
2185796c8dcSSimon Schubert
2195796c8dcSSimon Schubert # ifdef isblank
2205796c8dcSSimon Schubert # define ISBLANK(c) (ISASCII (c) && isblank (c))
2215796c8dcSSimon Schubert # else
2225796c8dcSSimon Schubert # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
2235796c8dcSSimon Schubert # endif
2245796c8dcSSimon Schubert # ifdef isgraph
2255796c8dcSSimon Schubert # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
2265796c8dcSSimon Schubert # else
2275796c8dcSSimon Schubert # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
2285796c8dcSSimon Schubert # endif
2295796c8dcSSimon Schubert
2305796c8dcSSimon Schubert # undef ISPRINT
2315796c8dcSSimon Schubert # define ISPRINT(c) (ISASCII (c) && isprint (c))
2325796c8dcSSimon Schubert # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
2335796c8dcSSimon Schubert # define ISALNUM(c) (ISASCII (c) && isalnum (c))
2345796c8dcSSimon Schubert # define ISALPHA(c) (ISASCII (c) && isalpha (c))
2355796c8dcSSimon Schubert # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
2365796c8dcSSimon Schubert # define ISLOWER(c) (ISASCII (c) && islower (c))
2375796c8dcSSimon Schubert # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
2385796c8dcSSimon Schubert # define ISSPACE(c) (ISASCII (c) && isspace (c))
2395796c8dcSSimon Schubert # define ISUPPER(c) (ISASCII (c) && isupper (c))
2405796c8dcSSimon Schubert # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
2415796c8dcSSimon Schubert
2425796c8dcSSimon Schubert # ifdef _tolower
2435796c8dcSSimon Schubert # define TOLOWER(c) _tolower(c)
2445796c8dcSSimon Schubert # else
2455796c8dcSSimon Schubert # define TOLOWER(c) tolower(c)
2465796c8dcSSimon Schubert # endif
2475796c8dcSSimon Schubert
2485796c8dcSSimon Schubert # ifndef NULL
2495796c8dcSSimon Schubert # define NULL (void *)0
2505796c8dcSSimon Schubert # endif
2515796c8dcSSimon Schubert
2525796c8dcSSimon Schubert /* We remove any previous definition of `SIGN_EXTEND_CHAR',
2535796c8dcSSimon Schubert since ours (we hope) works properly with all combinations of
2545796c8dcSSimon Schubert machines, compilers, `char' and `unsigned char' argument types.
2555796c8dcSSimon Schubert (Per Bothner suggested the basic approach.) */
2565796c8dcSSimon Schubert # undef SIGN_EXTEND_CHAR
2575796c8dcSSimon Schubert # if __STDC__
2585796c8dcSSimon Schubert # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
2595796c8dcSSimon Schubert # else /* not __STDC__ */
2605796c8dcSSimon Schubert /* As in Harbison and Steele. */
2615796c8dcSSimon Schubert # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
2625796c8dcSSimon Schubert # endif
2635796c8dcSSimon Schubert
2645796c8dcSSimon Schubert # ifndef emacs
2655796c8dcSSimon Schubert /* How many characters in the character set. */
2665796c8dcSSimon Schubert # define CHAR_SET_SIZE 256
2675796c8dcSSimon Schubert
2685796c8dcSSimon Schubert # ifdef SYNTAX_TABLE
2695796c8dcSSimon Schubert
2705796c8dcSSimon Schubert extern char *re_syntax_table;
2715796c8dcSSimon Schubert
2725796c8dcSSimon Schubert # else /* not SYNTAX_TABLE */
2735796c8dcSSimon Schubert
2745796c8dcSSimon Schubert static char re_syntax_table[CHAR_SET_SIZE];
2755796c8dcSSimon Schubert
2765796c8dcSSimon Schubert static void init_syntax_once (void);
2775796c8dcSSimon Schubert
2785796c8dcSSimon Schubert static void
init_syntax_once(void)2795796c8dcSSimon Schubert init_syntax_once (void)
2805796c8dcSSimon Schubert {
2815796c8dcSSimon Schubert register int c;
2825796c8dcSSimon Schubert static int done = 0;
2835796c8dcSSimon Schubert
2845796c8dcSSimon Schubert if (done)
2855796c8dcSSimon Schubert return;
2865796c8dcSSimon Schubert bzero (re_syntax_table, sizeof re_syntax_table);
2875796c8dcSSimon Schubert
2885796c8dcSSimon Schubert for (c = 0; c < CHAR_SET_SIZE; ++c)
2895796c8dcSSimon Schubert if (ISALNUM (c))
2905796c8dcSSimon Schubert re_syntax_table[c] = Sword;
2915796c8dcSSimon Schubert
2925796c8dcSSimon Schubert re_syntax_table['_'] = Sword;
2935796c8dcSSimon Schubert
2945796c8dcSSimon Schubert done = 1;
2955796c8dcSSimon Schubert }
2965796c8dcSSimon Schubert
2975796c8dcSSimon Schubert # endif /* not SYNTAX_TABLE */
2985796c8dcSSimon Schubert
2995796c8dcSSimon Schubert # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
3005796c8dcSSimon Schubert
3015796c8dcSSimon Schubert # endif /* emacs */
3025796c8dcSSimon Schubert
3035796c8dcSSimon Schubert /* Integer type for pointers. */
3045796c8dcSSimon Schubert # if !defined _LIBC && !defined HAVE_UINTPTR_T
3055796c8dcSSimon Schubert typedef unsigned long int uintptr_t;
3065796c8dcSSimon Schubert # endif
3075796c8dcSSimon Schubert
3085796c8dcSSimon Schubert /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
3095796c8dcSSimon Schubert use `alloca' instead of `malloc'. This is because using malloc in
3105796c8dcSSimon Schubert re_search* or re_match* could cause memory leaks when C-g is used in
3115796c8dcSSimon Schubert Emacs; also, malloc is slower and causes storage fragmentation. On
3125796c8dcSSimon Schubert the other hand, malloc is more portable, and easier to debug.
3135796c8dcSSimon Schubert
3145796c8dcSSimon Schubert Because we sometimes use alloca, some routines have to be macros,
3155796c8dcSSimon Schubert not functions -- `alloca'-allocated space disappears at the end of the
3165796c8dcSSimon Schubert function it is called in. */
3175796c8dcSSimon Schubert
3185796c8dcSSimon Schubert # ifdef REGEX_MALLOC
3195796c8dcSSimon Schubert
3205796c8dcSSimon Schubert # define REGEX_ALLOCATE malloc
3215796c8dcSSimon Schubert # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
3225796c8dcSSimon Schubert # define REGEX_FREE free
3235796c8dcSSimon Schubert
3245796c8dcSSimon Schubert # else /* not REGEX_MALLOC */
3255796c8dcSSimon Schubert
3265796c8dcSSimon Schubert /* Emacs already defines alloca, sometimes. */
3275796c8dcSSimon Schubert # ifndef alloca
3285796c8dcSSimon Schubert
3295796c8dcSSimon Schubert /* Make alloca work the best possible way. */
3305796c8dcSSimon Schubert # ifdef __GNUC__
3315796c8dcSSimon Schubert # define alloca __builtin_alloca
3325796c8dcSSimon Schubert # else /* not __GNUC__ */
3335796c8dcSSimon Schubert # if HAVE_ALLOCA_H
3345796c8dcSSimon Schubert # include <alloca.h>
3355796c8dcSSimon Schubert # endif /* HAVE_ALLOCA_H */
3365796c8dcSSimon Schubert # endif /* not __GNUC__ */
3375796c8dcSSimon Schubert
3385796c8dcSSimon Schubert # endif /* not alloca */
3395796c8dcSSimon Schubert
3405796c8dcSSimon Schubert # define REGEX_ALLOCATE alloca
3415796c8dcSSimon Schubert
3425796c8dcSSimon Schubert /* Assumes a `char *destination' variable. */
3435796c8dcSSimon Schubert # define REGEX_REALLOCATE(source, osize, nsize) \
3445796c8dcSSimon Schubert (destination = (char *) alloca (nsize), \
3455796c8dcSSimon Schubert memcpy (destination, source, osize))
3465796c8dcSSimon Schubert
3475796c8dcSSimon Schubert /* No need to do anything to free, after alloca. */
3485796c8dcSSimon Schubert # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
3495796c8dcSSimon Schubert
3505796c8dcSSimon Schubert # endif /* not REGEX_MALLOC */
3515796c8dcSSimon Schubert
3525796c8dcSSimon Schubert /* Define how to allocate the failure stack. */
3535796c8dcSSimon Schubert
3545796c8dcSSimon Schubert # if defined REL_ALLOC && defined REGEX_MALLOC
3555796c8dcSSimon Schubert
3565796c8dcSSimon Schubert # define REGEX_ALLOCATE_STACK(size) \
3575796c8dcSSimon Schubert r_alloc (&failure_stack_ptr, (size))
3585796c8dcSSimon Schubert # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
3595796c8dcSSimon Schubert r_re_alloc (&failure_stack_ptr, (nsize))
3605796c8dcSSimon Schubert # define REGEX_FREE_STACK(ptr) \
3615796c8dcSSimon Schubert r_alloc_free (&failure_stack_ptr)
3625796c8dcSSimon Schubert
3635796c8dcSSimon Schubert # else /* not using relocating allocator */
3645796c8dcSSimon Schubert
3655796c8dcSSimon Schubert # ifdef REGEX_MALLOC
3665796c8dcSSimon Schubert
3675796c8dcSSimon Schubert # define REGEX_ALLOCATE_STACK malloc
3685796c8dcSSimon Schubert # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
3695796c8dcSSimon Schubert # define REGEX_FREE_STACK free
3705796c8dcSSimon Schubert
3715796c8dcSSimon Schubert # else /* not REGEX_MALLOC */
3725796c8dcSSimon Schubert
3735796c8dcSSimon Schubert # define REGEX_ALLOCATE_STACK alloca
3745796c8dcSSimon Schubert
3755796c8dcSSimon Schubert # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
3765796c8dcSSimon Schubert REGEX_REALLOCATE (source, osize, nsize)
3775796c8dcSSimon Schubert /* No need to explicitly free anything. */
3785796c8dcSSimon Schubert # define REGEX_FREE_STACK(arg)
3795796c8dcSSimon Schubert
3805796c8dcSSimon Schubert # endif /* not REGEX_MALLOC */
3815796c8dcSSimon Schubert # endif /* not using relocating allocator */
3825796c8dcSSimon Schubert
3835796c8dcSSimon Schubert
3845796c8dcSSimon Schubert /* True if `size1' is non-NULL and PTR is pointing anywhere inside
3855796c8dcSSimon Schubert `string1' or just past its end. This works if PTR is NULL, which is
3865796c8dcSSimon Schubert a good thing. */
3875796c8dcSSimon Schubert # define FIRST_STRING_P(ptr) \
3885796c8dcSSimon Schubert (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
3895796c8dcSSimon Schubert
3905796c8dcSSimon Schubert /* (Re)Allocate N items of type T using malloc, or fail. */
3915796c8dcSSimon Schubert # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
3925796c8dcSSimon Schubert # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
3935796c8dcSSimon Schubert # define RETALLOC_IF(addr, n, t) \
3945796c8dcSSimon Schubert if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
3955796c8dcSSimon Schubert # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
3965796c8dcSSimon Schubert
3975796c8dcSSimon Schubert # define BYTEWIDTH 8 /* In bits. */
3985796c8dcSSimon Schubert
3995796c8dcSSimon Schubert # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
4005796c8dcSSimon Schubert
4015796c8dcSSimon Schubert # undef MAX
4025796c8dcSSimon Schubert # undef MIN
4035796c8dcSSimon Schubert # define MAX(a, b) ((a) > (b) ? (a) : (b))
4045796c8dcSSimon Schubert # define MIN(a, b) ((a) < (b) ? (a) : (b))
4055796c8dcSSimon Schubert
4065796c8dcSSimon Schubert typedef char boolean;
4075796c8dcSSimon Schubert # define false 0
4085796c8dcSSimon Schubert # define true 1
4095796c8dcSSimon Schubert
4105796c8dcSSimon Schubert static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
4115796c8dcSSimon Schubert reg_syntax_t syntax,
4125796c8dcSSimon Schubert struct re_pattern_buffer *bufp);
4135796c8dcSSimon Schubert
4145796c8dcSSimon Schubert static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
4155796c8dcSSimon Schubert const char *string1, int size1,
4165796c8dcSSimon Schubert const char *string2, int size2,
4175796c8dcSSimon Schubert int pos,
4185796c8dcSSimon Schubert struct re_registers *regs,
4195796c8dcSSimon Schubert int stop);
4205796c8dcSSimon Schubert static int byte_re_search_2 (struct re_pattern_buffer *bufp,
4215796c8dcSSimon Schubert const char *string1, int size1,
4225796c8dcSSimon Schubert const char *string2, int size2,
4235796c8dcSSimon Schubert int startpos, int range,
4245796c8dcSSimon Schubert struct re_registers *regs, int stop);
4255796c8dcSSimon Schubert static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
4265796c8dcSSimon Schubert
4275796c8dcSSimon Schubert #ifdef MBS_SUPPORT
4285796c8dcSSimon Schubert static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
4295796c8dcSSimon Schubert reg_syntax_t syntax,
4305796c8dcSSimon Schubert struct re_pattern_buffer *bufp);
4315796c8dcSSimon Schubert
4325796c8dcSSimon Schubert
4335796c8dcSSimon Schubert static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
4345796c8dcSSimon Schubert const char *cstring1, int csize1,
4355796c8dcSSimon Schubert const char *cstring2, int csize2,
4365796c8dcSSimon Schubert int pos,
4375796c8dcSSimon Schubert struct re_registers *regs,
4385796c8dcSSimon Schubert int stop,
4395796c8dcSSimon Schubert wchar_t *string1, int size1,
4405796c8dcSSimon Schubert wchar_t *string2, int size2,
4415796c8dcSSimon Schubert int *mbs_offset1, int *mbs_offset2);
4425796c8dcSSimon Schubert static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
4435796c8dcSSimon Schubert const char *string1, int size1,
4445796c8dcSSimon Schubert const char *string2, int size2,
4455796c8dcSSimon Schubert int startpos, int range,
4465796c8dcSSimon Schubert struct re_registers *regs, int stop);
4475796c8dcSSimon Schubert static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
4485796c8dcSSimon Schubert #endif
4495796c8dcSSimon Schubert
4505796c8dcSSimon Schubert /* These are the command codes that appear in compiled regular
4515796c8dcSSimon Schubert expressions. Some opcodes are followed by argument bytes. A
4525796c8dcSSimon Schubert command code can specify any interpretation whatsoever for its
4535796c8dcSSimon Schubert arguments. Zero bytes may appear in the compiled regular expression. */
4545796c8dcSSimon Schubert
4555796c8dcSSimon Schubert typedef enum
4565796c8dcSSimon Schubert {
4575796c8dcSSimon Schubert no_op = 0,
4585796c8dcSSimon Schubert
4595796c8dcSSimon Schubert /* Succeed right away--no more backtracking. */
4605796c8dcSSimon Schubert succeed,
4615796c8dcSSimon Schubert
4625796c8dcSSimon Schubert /* Followed by one byte giving n, then by n literal bytes. */
4635796c8dcSSimon Schubert exactn,
4645796c8dcSSimon Schubert
4655796c8dcSSimon Schubert # ifdef MBS_SUPPORT
4665796c8dcSSimon Schubert /* Same as exactn, but contains binary data. */
4675796c8dcSSimon Schubert exactn_bin,
4685796c8dcSSimon Schubert # endif
4695796c8dcSSimon Schubert
4705796c8dcSSimon Schubert /* Matches any (more or less) character. */
4715796c8dcSSimon Schubert anychar,
4725796c8dcSSimon Schubert
4735796c8dcSSimon Schubert /* Matches any one char belonging to specified set. First
4745796c8dcSSimon Schubert following byte is number of bitmap bytes. Then come bytes
4755796c8dcSSimon Schubert for a bitmap saying which chars are in. Bits in each byte
4765796c8dcSSimon Schubert are ordered low-bit-first. A character is in the set if its
4775796c8dcSSimon Schubert bit is 1. A character too large to have a bit in the map is
4785796c8dcSSimon Schubert automatically not in the set. */
4795796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, following element is length of character
4805796c8dcSSimon Schubert classes, length of collating symbols, length of equivalence
4815796c8dcSSimon Schubert classes, length of character ranges, and length of characters.
4825796c8dcSSimon Schubert Next, character class element, collating symbols elements,
4835796c8dcSSimon Schubert equivalence class elements, range elements, and character
4845796c8dcSSimon Schubert elements follow.
4855796c8dcSSimon Schubert See regex_compile function. */
4865796c8dcSSimon Schubert charset,
4875796c8dcSSimon Schubert
4885796c8dcSSimon Schubert /* Same parameters as charset, but match any character that is
4895796c8dcSSimon Schubert not one of those specified. */
4905796c8dcSSimon Schubert charset_not,
4915796c8dcSSimon Schubert
4925796c8dcSSimon Schubert /* Start remembering the text that is matched, for storing in a
4935796c8dcSSimon Schubert register. Followed by one byte with the register number, in
4945796c8dcSSimon Schubert the range 0 to one less than the pattern buffer's re_nsub
4955796c8dcSSimon Schubert field. Then followed by one byte with the number of groups
4965796c8dcSSimon Schubert inner to this one. (This last has to be part of the
4975796c8dcSSimon Schubert start_memory only because we need it in the on_failure_jump
4985796c8dcSSimon Schubert of re_match_2.) */
4995796c8dcSSimon Schubert start_memory,
5005796c8dcSSimon Schubert
5015796c8dcSSimon Schubert /* Stop remembering the text that is matched and store it in a
5025796c8dcSSimon Schubert memory register. Followed by one byte with the register
5035796c8dcSSimon Schubert number, in the range 0 to one less than `re_nsub' in the
5045796c8dcSSimon Schubert pattern buffer, and one byte with the number of inner groups,
5055796c8dcSSimon Schubert just like `start_memory'. (We need the number of inner
5065796c8dcSSimon Schubert groups here because we don't have any easy way of finding the
5075796c8dcSSimon Schubert corresponding start_memory when we're at a stop_memory.) */
5085796c8dcSSimon Schubert stop_memory,
5095796c8dcSSimon Schubert
5105796c8dcSSimon Schubert /* Match a duplicate of something remembered. Followed by one
5115796c8dcSSimon Schubert byte containing the register number. */
5125796c8dcSSimon Schubert duplicate,
5135796c8dcSSimon Schubert
5145796c8dcSSimon Schubert /* Fail unless at beginning of line. */
5155796c8dcSSimon Schubert begline,
5165796c8dcSSimon Schubert
5175796c8dcSSimon Schubert /* Fail unless at end of line. */
5185796c8dcSSimon Schubert endline,
5195796c8dcSSimon Schubert
5205796c8dcSSimon Schubert /* Succeeds if at beginning of buffer (if emacs) or at beginning
5215796c8dcSSimon Schubert of string to be matched (if not). */
5225796c8dcSSimon Schubert begbuf,
5235796c8dcSSimon Schubert
5245796c8dcSSimon Schubert /* Analogously, for end of buffer/string. */
5255796c8dcSSimon Schubert endbuf,
5265796c8dcSSimon Schubert
5275796c8dcSSimon Schubert /* Followed by two byte relative address to which to jump. */
5285796c8dcSSimon Schubert jump,
5295796c8dcSSimon Schubert
5305796c8dcSSimon Schubert /* Same as jump, but marks the end of an alternative. */
5315796c8dcSSimon Schubert jump_past_alt,
5325796c8dcSSimon Schubert
5335796c8dcSSimon Schubert /* Followed by two-byte relative address of place to resume at
5345796c8dcSSimon Schubert in case of failure. */
5355796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, the size of address is 1. */
5365796c8dcSSimon Schubert on_failure_jump,
5375796c8dcSSimon Schubert
5385796c8dcSSimon Schubert /* Like on_failure_jump, but pushes a placeholder instead of the
5395796c8dcSSimon Schubert current string position when executed. */
5405796c8dcSSimon Schubert on_failure_keep_string_jump,
5415796c8dcSSimon Schubert
5425796c8dcSSimon Schubert /* Throw away latest failure point and then jump to following
5435796c8dcSSimon Schubert two-byte relative address. */
5445796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, the size of address is 1. */
5455796c8dcSSimon Schubert pop_failure_jump,
5465796c8dcSSimon Schubert
5475796c8dcSSimon Schubert /* Change to pop_failure_jump if know won't have to backtrack to
5485796c8dcSSimon Schubert match; otherwise change to jump. This is used to jump
5495796c8dcSSimon Schubert back to the beginning of a repeat. If what follows this jump
5505796c8dcSSimon Schubert clearly won't match what the repeat does, such that we can be
5515796c8dcSSimon Schubert sure that there is no use backtracking out of repetitions
5525796c8dcSSimon Schubert already matched, then we change it to a pop_failure_jump.
5535796c8dcSSimon Schubert Followed by two-byte address. */
5545796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, the size of address is 1. */
5555796c8dcSSimon Schubert maybe_pop_jump,
5565796c8dcSSimon Schubert
5575796c8dcSSimon Schubert /* Jump to following two-byte address, and push a dummy failure
5585796c8dcSSimon Schubert point. This failure point will be thrown away if an attempt
5595796c8dcSSimon Schubert is made to use it for a failure. A `+' construct makes this
5605796c8dcSSimon Schubert before the first repeat. Also used as an intermediary kind
5615796c8dcSSimon Schubert of jump when compiling an alternative. */
5625796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, the size of address is 1. */
5635796c8dcSSimon Schubert dummy_failure_jump,
5645796c8dcSSimon Schubert
5655796c8dcSSimon Schubert /* Push a dummy failure point and continue. Used at the end of
5665796c8dcSSimon Schubert alternatives. */
5675796c8dcSSimon Schubert push_dummy_failure,
5685796c8dcSSimon Schubert
5695796c8dcSSimon Schubert /* Followed by two-byte relative address and two-byte number n.
5705796c8dcSSimon Schubert After matching N times, jump to the address upon failure. */
5715796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, the size of address is 1. */
5725796c8dcSSimon Schubert succeed_n,
5735796c8dcSSimon Schubert
5745796c8dcSSimon Schubert /* Followed by two-byte relative address, and two-byte number n.
5755796c8dcSSimon Schubert Jump to the address N times, then fail. */
5765796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, the size of address is 1. */
5775796c8dcSSimon Schubert jump_n,
5785796c8dcSSimon Schubert
5795796c8dcSSimon Schubert /* Set the following two-byte relative address to the
5805796c8dcSSimon Schubert subsequent two-byte number. The address *includes* the two
5815796c8dcSSimon Schubert bytes of number. */
5825796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, the size of address is 1. */
5835796c8dcSSimon Schubert set_number_at,
5845796c8dcSSimon Schubert
5855796c8dcSSimon Schubert wordchar, /* Matches any word-constituent character. */
5865796c8dcSSimon Schubert notwordchar, /* Matches any char that is not a word-constituent. */
5875796c8dcSSimon Schubert
5885796c8dcSSimon Schubert wordbeg, /* Succeeds if at word beginning. */
5895796c8dcSSimon Schubert wordend, /* Succeeds if at word end. */
5905796c8dcSSimon Schubert
5915796c8dcSSimon Schubert wordbound, /* Succeeds if at a word boundary. */
5925796c8dcSSimon Schubert notwordbound /* Succeeds if not at a word boundary. */
5935796c8dcSSimon Schubert
5945796c8dcSSimon Schubert # ifdef emacs
5955796c8dcSSimon Schubert ,before_dot, /* Succeeds if before point. */
5965796c8dcSSimon Schubert at_dot, /* Succeeds if at point. */
5975796c8dcSSimon Schubert after_dot, /* Succeeds if after point. */
5985796c8dcSSimon Schubert
5995796c8dcSSimon Schubert /* Matches any character whose syntax is specified. Followed by
6005796c8dcSSimon Schubert a byte which contains a syntax code, e.g., Sword. */
6015796c8dcSSimon Schubert syntaxspec,
6025796c8dcSSimon Schubert
6035796c8dcSSimon Schubert /* Matches any character whose syntax is not that specified. */
6045796c8dcSSimon Schubert notsyntaxspec
6055796c8dcSSimon Schubert # endif /* emacs */
6065796c8dcSSimon Schubert } re_opcode_t;
6075796c8dcSSimon Schubert #endif /* not INSIDE_RECURSION */
6085796c8dcSSimon Schubert
6095796c8dcSSimon Schubert
6105796c8dcSSimon Schubert #ifdef BYTE
6115796c8dcSSimon Schubert # define CHAR_T char
6125796c8dcSSimon Schubert # define UCHAR_T unsigned char
6135796c8dcSSimon Schubert # define COMPILED_BUFFER_VAR bufp->buffer
6145796c8dcSSimon Schubert # define OFFSET_ADDRESS_SIZE 2
6155796c8dcSSimon Schubert # define PREFIX(name) byte_##name
6165796c8dcSSimon Schubert # define ARG_PREFIX(name) name
6175796c8dcSSimon Schubert # define PUT_CHAR(c) putchar (c)
6185796c8dcSSimon Schubert #else
6195796c8dcSSimon Schubert # ifdef WCHAR
6205796c8dcSSimon Schubert # define CHAR_T wchar_t
6215796c8dcSSimon Schubert # define UCHAR_T wchar_t
6225796c8dcSSimon Schubert # define COMPILED_BUFFER_VAR wc_buffer
6235796c8dcSSimon Schubert # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
6245796c8dcSSimon Schubert # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
6255796c8dcSSimon Schubert # define PREFIX(name) wcs_##name
6265796c8dcSSimon Schubert # define ARG_PREFIX(name) c##name
6275796c8dcSSimon Schubert /* Should we use wide stream?? */
6285796c8dcSSimon Schubert # define PUT_CHAR(c) printf ("%C", c);
6295796c8dcSSimon Schubert # define TRUE 1
6305796c8dcSSimon Schubert # define FALSE 0
6315796c8dcSSimon Schubert # else
6325796c8dcSSimon Schubert # ifdef MBS_SUPPORT
6335796c8dcSSimon Schubert # define WCHAR
6345796c8dcSSimon Schubert # define INSIDE_RECURSION
6355796c8dcSSimon Schubert # include "regex.c"
6365796c8dcSSimon Schubert # undef INSIDE_RECURSION
6375796c8dcSSimon Schubert # endif
6385796c8dcSSimon Schubert # define BYTE
6395796c8dcSSimon Schubert # define INSIDE_RECURSION
6405796c8dcSSimon Schubert # include "regex.c"
6415796c8dcSSimon Schubert # undef INSIDE_RECURSION
6425796c8dcSSimon Schubert # endif
6435796c8dcSSimon Schubert #endif
6445796c8dcSSimon Schubert
6455796c8dcSSimon Schubert #ifdef INSIDE_RECURSION
6465796c8dcSSimon Schubert /* Common operations on the compiled pattern. */
6475796c8dcSSimon Schubert
6485796c8dcSSimon Schubert /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
6495796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
6505796c8dcSSimon Schubert
6515796c8dcSSimon Schubert # ifdef WCHAR
6525796c8dcSSimon Schubert # define STORE_NUMBER(destination, number) \
6535796c8dcSSimon Schubert do { \
6545796c8dcSSimon Schubert *(destination) = (UCHAR_T)(number); \
6555796c8dcSSimon Schubert } while (0)
6565796c8dcSSimon Schubert # else /* BYTE */
6575796c8dcSSimon Schubert # define STORE_NUMBER(destination, number) \
6585796c8dcSSimon Schubert do { \
6595796c8dcSSimon Schubert (destination)[0] = (number) & 0377; \
6605796c8dcSSimon Schubert (destination)[1] = (number) >> 8; \
6615796c8dcSSimon Schubert } while (0)
6625796c8dcSSimon Schubert # endif /* WCHAR */
6635796c8dcSSimon Schubert
6645796c8dcSSimon Schubert /* Same as STORE_NUMBER, except increment DESTINATION to
6655796c8dcSSimon Schubert the byte after where the number is stored. Therefore, DESTINATION
6665796c8dcSSimon Schubert must be an lvalue. */
6675796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
6685796c8dcSSimon Schubert
6695796c8dcSSimon Schubert # define STORE_NUMBER_AND_INCR(destination, number) \
6705796c8dcSSimon Schubert do { \
6715796c8dcSSimon Schubert STORE_NUMBER (destination, number); \
6725796c8dcSSimon Schubert (destination) += OFFSET_ADDRESS_SIZE; \
6735796c8dcSSimon Schubert } while (0)
6745796c8dcSSimon Schubert
6755796c8dcSSimon Schubert /* Put into DESTINATION a number stored in two contiguous bytes starting
6765796c8dcSSimon Schubert at SOURCE. */
6775796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
6785796c8dcSSimon Schubert
6795796c8dcSSimon Schubert # ifdef WCHAR
6805796c8dcSSimon Schubert # define EXTRACT_NUMBER(destination, source) \
6815796c8dcSSimon Schubert do { \
6825796c8dcSSimon Schubert (destination) = *(source); \
6835796c8dcSSimon Schubert } while (0)
6845796c8dcSSimon Schubert # else /* BYTE */
6855796c8dcSSimon Schubert # define EXTRACT_NUMBER(destination, source) \
6865796c8dcSSimon Schubert do { \
6875796c8dcSSimon Schubert (destination) = *(source) & 0377; \
6885796c8dcSSimon Schubert (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
6895796c8dcSSimon Schubert } while (0)
6905796c8dcSSimon Schubert # endif
6915796c8dcSSimon Schubert
6925796c8dcSSimon Schubert # ifdef DEBUG
6935796c8dcSSimon Schubert static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
6945796c8dcSSimon Schubert static void
PREFIX(extract_number)6955796c8dcSSimon Schubert PREFIX(extract_number) (int *dest, UCHAR_T *source)
6965796c8dcSSimon Schubert {
6975796c8dcSSimon Schubert # ifdef WCHAR
6985796c8dcSSimon Schubert *dest = *source;
6995796c8dcSSimon Schubert # else /* BYTE */
7005796c8dcSSimon Schubert int temp = SIGN_EXTEND_CHAR (*(source + 1));
7015796c8dcSSimon Schubert *dest = *source & 0377;
7025796c8dcSSimon Schubert *dest += temp << 8;
7035796c8dcSSimon Schubert # endif
7045796c8dcSSimon Schubert }
7055796c8dcSSimon Schubert
7065796c8dcSSimon Schubert # ifndef EXTRACT_MACROS /* To debug the macros. */
7075796c8dcSSimon Schubert # undef EXTRACT_NUMBER
7085796c8dcSSimon Schubert # define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
7095796c8dcSSimon Schubert # endif /* not EXTRACT_MACROS */
7105796c8dcSSimon Schubert
7115796c8dcSSimon Schubert # endif /* DEBUG */
7125796c8dcSSimon Schubert
7135796c8dcSSimon Schubert /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
7145796c8dcSSimon Schubert SOURCE must be an lvalue. */
7155796c8dcSSimon Schubert
7165796c8dcSSimon Schubert # define EXTRACT_NUMBER_AND_INCR(destination, source) \
7175796c8dcSSimon Schubert do { \
7185796c8dcSSimon Schubert EXTRACT_NUMBER (destination, source); \
7195796c8dcSSimon Schubert (source) += OFFSET_ADDRESS_SIZE; \
7205796c8dcSSimon Schubert } while (0)
7215796c8dcSSimon Schubert
7225796c8dcSSimon Schubert # ifdef DEBUG
7235796c8dcSSimon Schubert static void PREFIX(extract_number_and_incr) (int *destination,
7245796c8dcSSimon Schubert UCHAR_T **source);
7255796c8dcSSimon Schubert static void
PREFIX(extract_number_and_incr)7265796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
7275796c8dcSSimon Schubert {
7285796c8dcSSimon Schubert PREFIX(extract_number) (destination, *source);
7295796c8dcSSimon Schubert *source += OFFSET_ADDRESS_SIZE;
7305796c8dcSSimon Schubert }
7315796c8dcSSimon Schubert
7325796c8dcSSimon Schubert # ifndef EXTRACT_MACROS
7335796c8dcSSimon Schubert # undef EXTRACT_NUMBER_AND_INCR
7345796c8dcSSimon Schubert # define EXTRACT_NUMBER_AND_INCR(dest, src) \
7355796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&dest, &src)
7365796c8dcSSimon Schubert # endif /* not EXTRACT_MACROS */
7375796c8dcSSimon Schubert
7385796c8dcSSimon Schubert # endif /* DEBUG */
7395796c8dcSSimon Schubert
7405796c8dcSSimon Schubert
7415796c8dcSSimon Schubert
7425796c8dcSSimon Schubert /* If DEBUG is defined, Regex prints many voluminous messages about what
7435796c8dcSSimon Schubert it is doing (if the variable `debug' is nonzero). If linked with the
7445796c8dcSSimon Schubert main program in `iregex.c', you can enter patterns and strings
7455796c8dcSSimon Schubert interactively. And if linked with the main program in `main.c' and
7465796c8dcSSimon Schubert the other test files, you can run the already-written tests. */
7475796c8dcSSimon Schubert
7485796c8dcSSimon Schubert # ifdef DEBUG
7495796c8dcSSimon Schubert
7505796c8dcSSimon Schubert # ifndef DEFINED_ONCE
7515796c8dcSSimon Schubert
7525796c8dcSSimon Schubert /* We use standard I/O for debugging. */
7535796c8dcSSimon Schubert # include <stdio.h>
7545796c8dcSSimon Schubert
7555796c8dcSSimon Schubert /* It is useful to test things that ``must'' be true when debugging. */
7565796c8dcSSimon Schubert # include <assert.h>
7575796c8dcSSimon Schubert
7585796c8dcSSimon Schubert static int debug;
7595796c8dcSSimon Schubert
7605796c8dcSSimon Schubert # define DEBUG_STATEMENT(e) e
7615796c8dcSSimon Schubert # define DEBUG_PRINT1(x) if (debug) printf (x)
7625796c8dcSSimon Schubert # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
7635796c8dcSSimon Schubert # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
7645796c8dcSSimon Schubert # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
7655796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
7665796c8dcSSimon Schubert
7675796c8dcSSimon Schubert # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
7685796c8dcSSimon Schubert if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
7695796c8dcSSimon Schubert # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
7705796c8dcSSimon Schubert if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
7715796c8dcSSimon Schubert
7725796c8dcSSimon Schubert
7735796c8dcSSimon Schubert /* Print the fastmap in human-readable form. */
7745796c8dcSSimon Schubert
7755796c8dcSSimon Schubert # ifndef DEFINED_ONCE
7765796c8dcSSimon Schubert void
print_fastmap(char * fastmap)7775796c8dcSSimon Schubert print_fastmap (char *fastmap)
7785796c8dcSSimon Schubert {
7795796c8dcSSimon Schubert unsigned was_a_range = 0;
7805796c8dcSSimon Schubert unsigned i = 0;
7815796c8dcSSimon Schubert
7825796c8dcSSimon Schubert while (i < (1 << BYTEWIDTH))
7835796c8dcSSimon Schubert {
7845796c8dcSSimon Schubert if (fastmap[i++])
7855796c8dcSSimon Schubert {
7865796c8dcSSimon Schubert was_a_range = 0;
7875796c8dcSSimon Schubert putchar (i - 1);
7885796c8dcSSimon Schubert while (i < (1 << BYTEWIDTH) && fastmap[i])
7895796c8dcSSimon Schubert {
7905796c8dcSSimon Schubert was_a_range = 1;
7915796c8dcSSimon Schubert i++;
7925796c8dcSSimon Schubert }
7935796c8dcSSimon Schubert if (was_a_range)
7945796c8dcSSimon Schubert {
7955796c8dcSSimon Schubert printf ("-");
7965796c8dcSSimon Schubert putchar (i - 1);
7975796c8dcSSimon Schubert }
7985796c8dcSSimon Schubert }
7995796c8dcSSimon Schubert }
8005796c8dcSSimon Schubert putchar ('\n');
8015796c8dcSSimon Schubert }
8025796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
8035796c8dcSSimon Schubert
8045796c8dcSSimon Schubert
8055796c8dcSSimon Schubert /* Print a compiled pattern string in human-readable form, starting at
8065796c8dcSSimon Schubert the START pointer into it and ending just before the pointer END. */
8075796c8dcSSimon Schubert
8085796c8dcSSimon Schubert void
PREFIX(print_partial_compiled_pattern)8095796c8dcSSimon Schubert PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
8105796c8dcSSimon Schubert {
8115796c8dcSSimon Schubert int mcnt, mcnt2;
8125796c8dcSSimon Schubert UCHAR_T *p1;
8135796c8dcSSimon Schubert UCHAR_T *p = start;
8145796c8dcSSimon Schubert UCHAR_T *pend = end;
8155796c8dcSSimon Schubert
8165796c8dcSSimon Schubert if (start == NULL)
8175796c8dcSSimon Schubert {
8185796c8dcSSimon Schubert printf ("(null)\n");
8195796c8dcSSimon Schubert return;
8205796c8dcSSimon Schubert }
8215796c8dcSSimon Schubert
8225796c8dcSSimon Schubert /* Loop over pattern commands. */
8235796c8dcSSimon Schubert while (p < pend)
8245796c8dcSSimon Schubert {
8255796c8dcSSimon Schubert # ifdef _LIBC
8265796c8dcSSimon Schubert printf ("%td:\t", p - start);
8275796c8dcSSimon Schubert # else
8285796c8dcSSimon Schubert printf ("%ld:\t", (long int) (p - start));
8295796c8dcSSimon Schubert # endif
8305796c8dcSSimon Schubert
8315796c8dcSSimon Schubert switch ((re_opcode_t) *p++)
8325796c8dcSSimon Schubert {
8335796c8dcSSimon Schubert case no_op:
8345796c8dcSSimon Schubert printf ("/no_op");
8355796c8dcSSimon Schubert break;
8365796c8dcSSimon Schubert
8375796c8dcSSimon Schubert case exactn:
8385796c8dcSSimon Schubert mcnt = *p++;
8395796c8dcSSimon Schubert printf ("/exactn/%d", mcnt);
8405796c8dcSSimon Schubert do
8415796c8dcSSimon Schubert {
8425796c8dcSSimon Schubert putchar ('/');
8435796c8dcSSimon Schubert PUT_CHAR (*p++);
8445796c8dcSSimon Schubert }
8455796c8dcSSimon Schubert while (--mcnt);
8465796c8dcSSimon Schubert break;
8475796c8dcSSimon Schubert
8485796c8dcSSimon Schubert # ifdef MBS_SUPPORT
8495796c8dcSSimon Schubert case exactn_bin:
8505796c8dcSSimon Schubert mcnt = *p++;
8515796c8dcSSimon Schubert printf ("/exactn_bin/%d", mcnt);
8525796c8dcSSimon Schubert do
8535796c8dcSSimon Schubert {
8545796c8dcSSimon Schubert printf("/%lx", (long int) *p++);
8555796c8dcSSimon Schubert }
8565796c8dcSSimon Schubert while (--mcnt);
8575796c8dcSSimon Schubert break;
8585796c8dcSSimon Schubert # endif /* MBS_SUPPORT */
8595796c8dcSSimon Schubert
8605796c8dcSSimon Schubert case start_memory:
8615796c8dcSSimon Schubert mcnt = *p++;
8625796c8dcSSimon Schubert printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
8635796c8dcSSimon Schubert break;
8645796c8dcSSimon Schubert
8655796c8dcSSimon Schubert case stop_memory:
8665796c8dcSSimon Schubert mcnt = *p++;
8675796c8dcSSimon Schubert printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
8685796c8dcSSimon Schubert break;
8695796c8dcSSimon Schubert
8705796c8dcSSimon Schubert case duplicate:
8715796c8dcSSimon Schubert printf ("/duplicate/%ld", (long int) *p++);
8725796c8dcSSimon Schubert break;
8735796c8dcSSimon Schubert
8745796c8dcSSimon Schubert case anychar:
8755796c8dcSSimon Schubert printf ("/anychar");
8765796c8dcSSimon Schubert break;
8775796c8dcSSimon Schubert
8785796c8dcSSimon Schubert case charset:
8795796c8dcSSimon Schubert case charset_not:
8805796c8dcSSimon Schubert {
8815796c8dcSSimon Schubert # ifdef WCHAR
8825796c8dcSSimon Schubert int i, length;
8835796c8dcSSimon Schubert wchar_t *workp = p;
8845796c8dcSSimon Schubert printf ("/charset [%s",
8855796c8dcSSimon Schubert (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
8865796c8dcSSimon Schubert p += 5;
8875796c8dcSSimon Schubert length = *workp++; /* the length of char_classes */
8885796c8dcSSimon Schubert for (i=0 ; i<length ; i++)
8895796c8dcSSimon Schubert printf("[:%lx:]", (long int) *p++);
8905796c8dcSSimon Schubert length = *workp++; /* the length of collating_symbol */
8915796c8dcSSimon Schubert for (i=0 ; i<length ;)
8925796c8dcSSimon Schubert {
8935796c8dcSSimon Schubert printf("[.");
8945796c8dcSSimon Schubert while(*p != 0)
8955796c8dcSSimon Schubert PUT_CHAR((i++,*p++));
8965796c8dcSSimon Schubert i++,p++;
8975796c8dcSSimon Schubert printf(".]");
8985796c8dcSSimon Schubert }
8995796c8dcSSimon Schubert length = *workp++; /* the length of equivalence_class */
9005796c8dcSSimon Schubert for (i=0 ; i<length ;)
9015796c8dcSSimon Schubert {
9025796c8dcSSimon Schubert printf("[=");
9035796c8dcSSimon Schubert while(*p != 0)
9045796c8dcSSimon Schubert PUT_CHAR((i++,*p++));
9055796c8dcSSimon Schubert i++,p++;
9065796c8dcSSimon Schubert printf("=]");
9075796c8dcSSimon Schubert }
9085796c8dcSSimon Schubert length = *workp++; /* the length of char_range */
9095796c8dcSSimon Schubert for (i=0 ; i<length ; i++)
9105796c8dcSSimon Schubert {
9115796c8dcSSimon Schubert wchar_t range_start = *p++;
9125796c8dcSSimon Schubert wchar_t range_end = *p++;
9135796c8dcSSimon Schubert printf("%C-%C", range_start, range_end);
9145796c8dcSSimon Schubert }
9155796c8dcSSimon Schubert length = *workp++; /* the length of char */
9165796c8dcSSimon Schubert for (i=0 ; i<length ; i++)
9175796c8dcSSimon Schubert printf("%C", *p++);
9185796c8dcSSimon Schubert putchar (']');
9195796c8dcSSimon Schubert # else
9205796c8dcSSimon Schubert register int c, last = -100;
9215796c8dcSSimon Schubert register int in_range = 0;
9225796c8dcSSimon Schubert
9235796c8dcSSimon Schubert printf ("/charset [%s",
9245796c8dcSSimon Schubert (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
9255796c8dcSSimon Schubert
9265796c8dcSSimon Schubert assert (p + *p < pend);
9275796c8dcSSimon Schubert
9285796c8dcSSimon Schubert for (c = 0; c < 256; c++)
9295796c8dcSSimon Schubert if (c / 8 < *p
9305796c8dcSSimon Schubert && (p[1 + (c/8)] & (1 << (c % 8))))
9315796c8dcSSimon Schubert {
9325796c8dcSSimon Schubert /* Are we starting a range? */
9335796c8dcSSimon Schubert if (last + 1 == c && ! in_range)
9345796c8dcSSimon Schubert {
9355796c8dcSSimon Schubert putchar ('-');
9365796c8dcSSimon Schubert in_range = 1;
9375796c8dcSSimon Schubert }
9385796c8dcSSimon Schubert /* Have we broken a range? */
9395796c8dcSSimon Schubert else if (last + 1 != c && in_range)
9405796c8dcSSimon Schubert {
9415796c8dcSSimon Schubert putchar (last);
9425796c8dcSSimon Schubert in_range = 0;
9435796c8dcSSimon Schubert }
9445796c8dcSSimon Schubert
9455796c8dcSSimon Schubert if (! in_range)
9465796c8dcSSimon Schubert putchar (c);
9475796c8dcSSimon Schubert
9485796c8dcSSimon Schubert last = c;
9495796c8dcSSimon Schubert }
9505796c8dcSSimon Schubert
9515796c8dcSSimon Schubert if (in_range)
9525796c8dcSSimon Schubert putchar (last);
9535796c8dcSSimon Schubert
9545796c8dcSSimon Schubert putchar (']');
9555796c8dcSSimon Schubert
9565796c8dcSSimon Schubert p += 1 + *p;
9575796c8dcSSimon Schubert # endif /* WCHAR */
9585796c8dcSSimon Schubert }
9595796c8dcSSimon Schubert break;
9605796c8dcSSimon Schubert
9615796c8dcSSimon Schubert case begline:
9625796c8dcSSimon Schubert printf ("/begline");
9635796c8dcSSimon Schubert break;
9645796c8dcSSimon Schubert
9655796c8dcSSimon Schubert case endline:
9665796c8dcSSimon Schubert printf ("/endline");
9675796c8dcSSimon Schubert break;
9685796c8dcSSimon Schubert
9695796c8dcSSimon Schubert case on_failure_jump:
9705796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
9715796c8dcSSimon Schubert # ifdef _LIBC
9725796c8dcSSimon Schubert printf ("/on_failure_jump to %td", p + mcnt - start);
9735796c8dcSSimon Schubert # else
9745796c8dcSSimon Schubert printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
9755796c8dcSSimon Schubert # endif
9765796c8dcSSimon Schubert break;
9775796c8dcSSimon Schubert
9785796c8dcSSimon Schubert case on_failure_keep_string_jump:
9795796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
9805796c8dcSSimon Schubert # ifdef _LIBC
9815796c8dcSSimon Schubert printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
9825796c8dcSSimon Schubert # else
9835796c8dcSSimon Schubert printf ("/on_failure_keep_string_jump to %ld",
9845796c8dcSSimon Schubert (long int) (p + mcnt - start));
9855796c8dcSSimon Schubert # endif
9865796c8dcSSimon Schubert break;
9875796c8dcSSimon Schubert
9885796c8dcSSimon Schubert case dummy_failure_jump:
9895796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
9905796c8dcSSimon Schubert # ifdef _LIBC
9915796c8dcSSimon Schubert printf ("/dummy_failure_jump to %td", p + mcnt - start);
9925796c8dcSSimon Schubert # else
9935796c8dcSSimon Schubert printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
9945796c8dcSSimon Schubert # endif
9955796c8dcSSimon Schubert break;
9965796c8dcSSimon Schubert
9975796c8dcSSimon Schubert case push_dummy_failure:
9985796c8dcSSimon Schubert printf ("/push_dummy_failure");
9995796c8dcSSimon Schubert break;
10005796c8dcSSimon Schubert
10015796c8dcSSimon Schubert case maybe_pop_jump:
10025796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
10035796c8dcSSimon Schubert # ifdef _LIBC
10045796c8dcSSimon Schubert printf ("/maybe_pop_jump to %td", p + mcnt - start);
10055796c8dcSSimon Schubert # else
10065796c8dcSSimon Schubert printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
10075796c8dcSSimon Schubert # endif
10085796c8dcSSimon Schubert break;
10095796c8dcSSimon Schubert
10105796c8dcSSimon Schubert case pop_failure_jump:
10115796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
10125796c8dcSSimon Schubert # ifdef _LIBC
10135796c8dcSSimon Schubert printf ("/pop_failure_jump to %td", p + mcnt - start);
10145796c8dcSSimon Schubert # else
10155796c8dcSSimon Schubert printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
10165796c8dcSSimon Schubert # endif
10175796c8dcSSimon Schubert break;
10185796c8dcSSimon Schubert
10195796c8dcSSimon Schubert case jump_past_alt:
10205796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
10215796c8dcSSimon Schubert # ifdef _LIBC
10225796c8dcSSimon Schubert printf ("/jump_past_alt to %td", p + mcnt - start);
10235796c8dcSSimon Schubert # else
10245796c8dcSSimon Schubert printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
10255796c8dcSSimon Schubert # endif
10265796c8dcSSimon Schubert break;
10275796c8dcSSimon Schubert
10285796c8dcSSimon Schubert case jump:
10295796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
10305796c8dcSSimon Schubert # ifdef _LIBC
10315796c8dcSSimon Schubert printf ("/jump to %td", p + mcnt - start);
10325796c8dcSSimon Schubert # else
10335796c8dcSSimon Schubert printf ("/jump to %ld", (long int) (p + mcnt - start));
10345796c8dcSSimon Schubert # endif
10355796c8dcSSimon Schubert break;
10365796c8dcSSimon Schubert
10375796c8dcSSimon Schubert case succeed_n:
10385796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
10395796c8dcSSimon Schubert p1 = p + mcnt;
10405796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt2, &p);
10415796c8dcSSimon Schubert # ifdef _LIBC
10425796c8dcSSimon Schubert printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
10435796c8dcSSimon Schubert # else
10445796c8dcSSimon Schubert printf ("/succeed_n to %ld, %d times",
10455796c8dcSSimon Schubert (long int) (p1 - start), mcnt2);
10465796c8dcSSimon Schubert # endif
10475796c8dcSSimon Schubert break;
10485796c8dcSSimon Schubert
10495796c8dcSSimon Schubert case jump_n:
10505796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
10515796c8dcSSimon Schubert p1 = p + mcnt;
10525796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt2, &p);
10535796c8dcSSimon Schubert printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
10545796c8dcSSimon Schubert break;
10555796c8dcSSimon Schubert
10565796c8dcSSimon Schubert case set_number_at:
10575796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt, &p);
10585796c8dcSSimon Schubert p1 = p + mcnt;
10595796c8dcSSimon Schubert PREFIX(extract_number_and_incr) (&mcnt2, &p);
10605796c8dcSSimon Schubert # ifdef _LIBC
10615796c8dcSSimon Schubert printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
10625796c8dcSSimon Schubert # else
10635796c8dcSSimon Schubert printf ("/set_number_at location %ld to %d",
10645796c8dcSSimon Schubert (long int) (p1 - start), mcnt2);
10655796c8dcSSimon Schubert # endif
10665796c8dcSSimon Schubert break;
10675796c8dcSSimon Schubert
10685796c8dcSSimon Schubert case wordbound:
10695796c8dcSSimon Schubert printf ("/wordbound");
10705796c8dcSSimon Schubert break;
10715796c8dcSSimon Schubert
10725796c8dcSSimon Schubert case notwordbound:
10735796c8dcSSimon Schubert printf ("/notwordbound");
10745796c8dcSSimon Schubert break;
10755796c8dcSSimon Schubert
10765796c8dcSSimon Schubert case wordbeg:
10775796c8dcSSimon Schubert printf ("/wordbeg");
10785796c8dcSSimon Schubert break;
10795796c8dcSSimon Schubert
10805796c8dcSSimon Schubert case wordend:
10815796c8dcSSimon Schubert printf ("/wordend");
10825796c8dcSSimon Schubert break;
10835796c8dcSSimon Schubert
10845796c8dcSSimon Schubert # ifdef emacs
10855796c8dcSSimon Schubert case before_dot:
10865796c8dcSSimon Schubert printf ("/before_dot");
10875796c8dcSSimon Schubert break;
10885796c8dcSSimon Schubert
10895796c8dcSSimon Schubert case at_dot:
10905796c8dcSSimon Schubert printf ("/at_dot");
10915796c8dcSSimon Schubert break;
10925796c8dcSSimon Schubert
10935796c8dcSSimon Schubert case after_dot:
10945796c8dcSSimon Schubert printf ("/after_dot");
10955796c8dcSSimon Schubert break;
10965796c8dcSSimon Schubert
10975796c8dcSSimon Schubert case syntaxspec:
10985796c8dcSSimon Schubert printf ("/syntaxspec");
10995796c8dcSSimon Schubert mcnt = *p++;
11005796c8dcSSimon Schubert printf ("/%d", mcnt);
11015796c8dcSSimon Schubert break;
11025796c8dcSSimon Schubert
11035796c8dcSSimon Schubert case notsyntaxspec:
11045796c8dcSSimon Schubert printf ("/notsyntaxspec");
11055796c8dcSSimon Schubert mcnt = *p++;
11065796c8dcSSimon Schubert printf ("/%d", mcnt);
11075796c8dcSSimon Schubert break;
11085796c8dcSSimon Schubert # endif /* emacs */
11095796c8dcSSimon Schubert
11105796c8dcSSimon Schubert case wordchar:
11115796c8dcSSimon Schubert printf ("/wordchar");
11125796c8dcSSimon Schubert break;
11135796c8dcSSimon Schubert
11145796c8dcSSimon Schubert case notwordchar:
11155796c8dcSSimon Schubert printf ("/notwordchar");
11165796c8dcSSimon Schubert break;
11175796c8dcSSimon Schubert
11185796c8dcSSimon Schubert case begbuf:
11195796c8dcSSimon Schubert printf ("/begbuf");
11205796c8dcSSimon Schubert break;
11215796c8dcSSimon Schubert
11225796c8dcSSimon Schubert case endbuf:
11235796c8dcSSimon Schubert printf ("/endbuf");
11245796c8dcSSimon Schubert break;
11255796c8dcSSimon Schubert
11265796c8dcSSimon Schubert default:
11275796c8dcSSimon Schubert printf ("?%ld", (long int) *(p-1));
11285796c8dcSSimon Schubert }
11295796c8dcSSimon Schubert
11305796c8dcSSimon Schubert putchar ('\n');
11315796c8dcSSimon Schubert }
11325796c8dcSSimon Schubert
11335796c8dcSSimon Schubert # ifdef _LIBC
11345796c8dcSSimon Schubert printf ("%td:\tend of pattern.\n", p - start);
11355796c8dcSSimon Schubert # else
11365796c8dcSSimon Schubert printf ("%ld:\tend of pattern.\n", (long int) (p - start));
11375796c8dcSSimon Schubert # endif
11385796c8dcSSimon Schubert }
11395796c8dcSSimon Schubert
11405796c8dcSSimon Schubert
11415796c8dcSSimon Schubert void
PREFIX(print_compiled_pattern)11425796c8dcSSimon Schubert PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
11435796c8dcSSimon Schubert {
11445796c8dcSSimon Schubert UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
11455796c8dcSSimon Schubert
11465796c8dcSSimon Schubert PREFIX(print_partial_compiled_pattern) (buffer, buffer
11475796c8dcSSimon Schubert + bufp->used / sizeof(UCHAR_T));
11485796c8dcSSimon Schubert printf ("%ld bytes used/%ld bytes allocated.\n",
11495796c8dcSSimon Schubert bufp->used, bufp->allocated);
11505796c8dcSSimon Schubert
11515796c8dcSSimon Schubert if (bufp->fastmap_accurate && bufp->fastmap)
11525796c8dcSSimon Schubert {
11535796c8dcSSimon Schubert printf ("fastmap: ");
11545796c8dcSSimon Schubert print_fastmap (bufp->fastmap);
11555796c8dcSSimon Schubert }
11565796c8dcSSimon Schubert
11575796c8dcSSimon Schubert # ifdef _LIBC
11585796c8dcSSimon Schubert printf ("re_nsub: %Zd\t", bufp->re_nsub);
11595796c8dcSSimon Schubert # else
11605796c8dcSSimon Schubert printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
11615796c8dcSSimon Schubert # endif
11625796c8dcSSimon Schubert printf ("regs_alloc: %d\t", bufp->regs_allocated);
11635796c8dcSSimon Schubert printf ("can_be_null: %d\t", bufp->can_be_null);
11645796c8dcSSimon Schubert printf ("newline_anchor: %d\n", bufp->newline_anchor);
11655796c8dcSSimon Schubert printf ("no_sub: %d\t", bufp->no_sub);
11665796c8dcSSimon Schubert printf ("not_bol: %d\t", bufp->not_bol);
11675796c8dcSSimon Schubert printf ("not_eol: %d\t", bufp->not_eol);
11685796c8dcSSimon Schubert printf ("syntax: %lx\n", bufp->syntax);
11695796c8dcSSimon Schubert /* Perhaps we should print the translate table? */
11705796c8dcSSimon Schubert }
11715796c8dcSSimon Schubert
11725796c8dcSSimon Schubert
11735796c8dcSSimon Schubert void
PREFIX(print_double_string)11745796c8dcSSimon Schubert PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
11755796c8dcSSimon Schubert int size1, const CHAR_T *string2, int size2)
11765796c8dcSSimon Schubert {
11775796c8dcSSimon Schubert int this_char;
11785796c8dcSSimon Schubert
11795796c8dcSSimon Schubert if (where == NULL)
11805796c8dcSSimon Schubert printf ("(null)");
11815796c8dcSSimon Schubert else
11825796c8dcSSimon Schubert {
11835796c8dcSSimon Schubert int cnt;
11845796c8dcSSimon Schubert
11855796c8dcSSimon Schubert if (FIRST_STRING_P (where))
11865796c8dcSSimon Schubert {
11875796c8dcSSimon Schubert for (this_char = where - string1; this_char < size1; this_char++)
11885796c8dcSSimon Schubert PUT_CHAR (string1[this_char]);
11895796c8dcSSimon Schubert
11905796c8dcSSimon Schubert where = string2;
11915796c8dcSSimon Schubert }
11925796c8dcSSimon Schubert
11935796c8dcSSimon Schubert cnt = 0;
11945796c8dcSSimon Schubert for (this_char = where - string2; this_char < size2; this_char++)
11955796c8dcSSimon Schubert {
11965796c8dcSSimon Schubert PUT_CHAR (string2[this_char]);
11975796c8dcSSimon Schubert if (++cnt > 100)
11985796c8dcSSimon Schubert {
11995796c8dcSSimon Schubert fputs ("...", stdout);
12005796c8dcSSimon Schubert break;
12015796c8dcSSimon Schubert }
12025796c8dcSSimon Schubert }
12035796c8dcSSimon Schubert }
12045796c8dcSSimon Schubert }
12055796c8dcSSimon Schubert
12065796c8dcSSimon Schubert # ifndef DEFINED_ONCE
12075796c8dcSSimon Schubert void
printchar(int c)12085796c8dcSSimon Schubert printchar (int c)
12095796c8dcSSimon Schubert {
12105796c8dcSSimon Schubert putc (c, stderr);
12115796c8dcSSimon Schubert }
12125796c8dcSSimon Schubert # endif
12135796c8dcSSimon Schubert
12145796c8dcSSimon Schubert # else /* not DEBUG */
12155796c8dcSSimon Schubert
12165796c8dcSSimon Schubert # ifndef DEFINED_ONCE
12175796c8dcSSimon Schubert # undef assert
12185796c8dcSSimon Schubert # define assert(e)
12195796c8dcSSimon Schubert
12205796c8dcSSimon Schubert # define DEBUG_STATEMENT(e)
12215796c8dcSSimon Schubert # define DEBUG_PRINT1(x)
12225796c8dcSSimon Schubert # define DEBUG_PRINT2(x1, x2)
12235796c8dcSSimon Schubert # define DEBUG_PRINT3(x1, x2, x3)
12245796c8dcSSimon Schubert # define DEBUG_PRINT4(x1, x2, x3, x4)
12255796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
12265796c8dcSSimon Schubert # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
12275796c8dcSSimon Schubert # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
12285796c8dcSSimon Schubert
12295796c8dcSSimon Schubert # endif /* not DEBUG */
12305796c8dcSSimon Schubert
12315796c8dcSSimon Schubert
12325796c8dcSSimon Schubert
12335796c8dcSSimon Schubert # ifdef WCHAR
12345796c8dcSSimon Schubert /* This convert a multibyte string to a wide character string.
12355796c8dcSSimon Schubert And write their correspondances to offset_buffer(see below)
12365796c8dcSSimon Schubert and write whether each wchar_t is binary data to is_binary.
12375796c8dcSSimon Schubert This assume invalid multibyte sequences as binary data.
12385796c8dcSSimon Schubert We assume offset_buffer and is_binary is already allocated
12395796c8dcSSimon Schubert enough space. */
12405796c8dcSSimon Schubert
12415796c8dcSSimon Schubert static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
12425796c8dcSSimon Schubert size_t len, int *offset_buffer,
12435796c8dcSSimon Schubert char *is_binary);
12445796c8dcSSimon Schubert static size_t
convert_mbs_to_wcs(CHAR_T * dest,const unsigned char * src,size_t len,int * offset_buffer,char * is_binary)12455796c8dcSSimon Schubert convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
12465796c8dcSSimon Schubert int *offset_buffer, char *is_binary)
12475796c8dcSSimon Schubert /* It hold correspondances between src(char string) and
12485796c8dcSSimon Schubert dest(wchar_t string) for optimization.
12495796c8dcSSimon Schubert e.g. src = "xxxyzz"
12505796c8dcSSimon Schubert dest = {'X', 'Y', 'Z'}
12515796c8dcSSimon Schubert (each "xxx", "y" and "zz" represent one multibyte character
12525796c8dcSSimon Schubert corresponding to 'X', 'Y' and 'Z'.)
12535796c8dcSSimon Schubert offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
12545796c8dcSSimon Schubert = {0, 3, 4, 6}
12555796c8dcSSimon Schubert */
12565796c8dcSSimon Schubert {
12575796c8dcSSimon Schubert wchar_t *pdest = dest;
12585796c8dcSSimon Schubert const unsigned char *psrc = src;
12595796c8dcSSimon Schubert size_t wc_count = 0;
12605796c8dcSSimon Schubert
12615796c8dcSSimon Schubert mbstate_t mbs;
12625796c8dcSSimon Schubert int i, consumed;
12635796c8dcSSimon Schubert size_t mb_remain = len;
12645796c8dcSSimon Schubert size_t mb_count = 0;
12655796c8dcSSimon Schubert
12665796c8dcSSimon Schubert /* Initialize the conversion state. */
12675796c8dcSSimon Schubert memset (&mbs, 0, sizeof (mbstate_t));
12685796c8dcSSimon Schubert
12695796c8dcSSimon Schubert offset_buffer[0] = 0;
12705796c8dcSSimon Schubert for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
12715796c8dcSSimon Schubert psrc += consumed)
12725796c8dcSSimon Schubert {
12735796c8dcSSimon Schubert #ifdef _LIBC
12745796c8dcSSimon Schubert consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
12755796c8dcSSimon Schubert #else
12765796c8dcSSimon Schubert consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
12775796c8dcSSimon Schubert #endif
12785796c8dcSSimon Schubert
12795796c8dcSSimon Schubert if (consumed <= 0)
12805796c8dcSSimon Schubert /* failed to convert. maybe src contains binary data.
12815796c8dcSSimon Schubert So we consume 1 byte manualy. */
12825796c8dcSSimon Schubert {
12835796c8dcSSimon Schubert *pdest = *psrc;
12845796c8dcSSimon Schubert consumed = 1;
12855796c8dcSSimon Schubert is_binary[wc_count] = TRUE;
12865796c8dcSSimon Schubert }
12875796c8dcSSimon Schubert else
12885796c8dcSSimon Schubert is_binary[wc_count] = FALSE;
12895796c8dcSSimon Schubert /* In sjis encoding, we use yen sign as escape character in
12905796c8dcSSimon Schubert place of reverse solidus. So we convert 0x5c(yen sign in
12915796c8dcSSimon Schubert sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
12925796c8dcSSimon Schubert solidus in UCS2). */
12935796c8dcSSimon Schubert if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
12945796c8dcSSimon Schubert *pdest = (wchar_t) *psrc;
12955796c8dcSSimon Schubert
12965796c8dcSSimon Schubert offset_buffer[wc_count + 1] = mb_count += consumed;
12975796c8dcSSimon Schubert }
12985796c8dcSSimon Schubert
12995796c8dcSSimon Schubert /* Fill remain of the buffer with sentinel. */
13005796c8dcSSimon Schubert for (i = wc_count + 1 ; i <= len ; i++)
13015796c8dcSSimon Schubert offset_buffer[i] = mb_count + 1;
13025796c8dcSSimon Schubert
13035796c8dcSSimon Schubert return wc_count;
13045796c8dcSSimon Schubert }
13055796c8dcSSimon Schubert
13065796c8dcSSimon Schubert # endif /* WCHAR */
13075796c8dcSSimon Schubert
13085796c8dcSSimon Schubert #else /* not INSIDE_RECURSION */
13095796c8dcSSimon Schubert
13105796c8dcSSimon Schubert /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
13115796c8dcSSimon Schubert also be assigned to arbitrarily: each pattern buffer stores its own
13125796c8dcSSimon Schubert syntax, so it can be changed between regex compilations. */
13135796c8dcSSimon Schubert /* This has no initializer because initialized variables in Emacs
13145796c8dcSSimon Schubert become read-only after dumping. */
13155796c8dcSSimon Schubert reg_syntax_t re_syntax_options;
13165796c8dcSSimon Schubert
13175796c8dcSSimon Schubert
13185796c8dcSSimon Schubert /* Specify the precise syntax of regexps for compilation. This provides
13195796c8dcSSimon Schubert for compatibility for various utilities which historically have
13205796c8dcSSimon Schubert different, incompatible syntaxes.
13215796c8dcSSimon Schubert
13225796c8dcSSimon Schubert The argument SYNTAX is a bit mask comprised of the various bits
13235796c8dcSSimon Schubert defined in regex.h. We return the old syntax. */
13245796c8dcSSimon Schubert
13255796c8dcSSimon Schubert reg_syntax_t
re_set_syntax(reg_syntax_t syntax)13265796c8dcSSimon Schubert re_set_syntax (reg_syntax_t syntax)
13275796c8dcSSimon Schubert {
13285796c8dcSSimon Schubert reg_syntax_t ret = re_syntax_options;
13295796c8dcSSimon Schubert
13305796c8dcSSimon Schubert re_syntax_options = syntax;
13315796c8dcSSimon Schubert # ifdef DEBUG
13325796c8dcSSimon Schubert if (syntax & RE_DEBUG)
13335796c8dcSSimon Schubert debug = 1;
13345796c8dcSSimon Schubert else if (debug) /* was on but now is not */
13355796c8dcSSimon Schubert debug = 0;
13365796c8dcSSimon Schubert # endif /* DEBUG */
13375796c8dcSSimon Schubert return ret;
13385796c8dcSSimon Schubert }
13395796c8dcSSimon Schubert # ifdef _LIBC
13405796c8dcSSimon Schubert weak_alias (__re_set_syntax, re_set_syntax)
13415796c8dcSSimon Schubert # endif
13425796c8dcSSimon Schubert
13435796c8dcSSimon Schubert /* This table gives an error message for each of the error codes listed
13445796c8dcSSimon Schubert in regex.h. Obviously the order here has to be same as there.
13455796c8dcSSimon Schubert POSIX doesn't require that we do anything for REG_NOERROR,
13465796c8dcSSimon Schubert but why not be nice? */
13475796c8dcSSimon Schubert
13485796c8dcSSimon Schubert static const char *re_error_msgid[] =
13495796c8dcSSimon Schubert {
13505796c8dcSSimon Schubert gettext_noop ("Success"), /* REG_NOERROR */
13515796c8dcSSimon Schubert gettext_noop ("No match"), /* REG_NOMATCH */
13525796c8dcSSimon Schubert gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
13535796c8dcSSimon Schubert gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
13545796c8dcSSimon Schubert gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
13555796c8dcSSimon Schubert gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
13565796c8dcSSimon Schubert gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
13575796c8dcSSimon Schubert gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
13585796c8dcSSimon Schubert gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
13595796c8dcSSimon Schubert gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
13605796c8dcSSimon Schubert gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
13615796c8dcSSimon Schubert gettext_noop ("Invalid range end"), /* REG_ERANGE */
13625796c8dcSSimon Schubert gettext_noop ("Memory exhausted"), /* REG_ESPACE */
13635796c8dcSSimon Schubert gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
13645796c8dcSSimon Schubert gettext_noop ("Premature end of regular expression"), /* REG_EEND */
13655796c8dcSSimon Schubert gettext_noop ("Regular expression too big"), /* REG_ESIZE */
13665796c8dcSSimon Schubert gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
13675796c8dcSSimon Schubert };
13685796c8dcSSimon Schubert
13695796c8dcSSimon Schubert #endif /* INSIDE_RECURSION */
13705796c8dcSSimon Schubert
13715796c8dcSSimon Schubert #ifndef DEFINED_ONCE
13725796c8dcSSimon Schubert /* Avoiding alloca during matching, to placate r_alloc. */
13735796c8dcSSimon Schubert
13745796c8dcSSimon Schubert /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
13755796c8dcSSimon Schubert searching and matching functions should not call alloca. On some
13765796c8dcSSimon Schubert systems, alloca is implemented in terms of malloc, and if we're
13775796c8dcSSimon Schubert using the relocating allocator routines, then malloc could cause a
13785796c8dcSSimon Schubert relocation, which might (if the strings being searched are in the
13795796c8dcSSimon Schubert ralloc heap) shift the data out from underneath the regexp
13805796c8dcSSimon Schubert routines.
13815796c8dcSSimon Schubert
13825796c8dcSSimon Schubert Here's another reason to avoid allocation: Emacs
13835796c8dcSSimon Schubert processes input from X in a signal handler; processing X input may
13845796c8dcSSimon Schubert call malloc; if input arrives while a matching routine is calling
13855796c8dcSSimon Schubert malloc, then we're scrod. But Emacs can't just block input while
13865796c8dcSSimon Schubert calling matching routines; then we don't notice interrupts when
13875796c8dcSSimon Schubert they come in. So, Emacs blocks input around all regexp calls
13885796c8dcSSimon Schubert except the matching calls, which it leaves unprotected, in the
13895796c8dcSSimon Schubert faith that they will not malloc. */
13905796c8dcSSimon Schubert
13915796c8dcSSimon Schubert /* Normally, this is fine. */
13925796c8dcSSimon Schubert # define MATCH_MAY_ALLOCATE
13935796c8dcSSimon Schubert
13945796c8dcSSimon Schubert /* When using GNU C, we are not REALLY using the C alloca, no matter
13955796c8dcSSimon Schubert what config.h may say. So don't take precautions for it. */
13965796c8dcSSimon Schubert # ifdef __GNUC__
13975796c8dcSSimon Schubert # undef C_ALLOCA
13985796c8dcSSimon Schubert # endif
13995796c8dcSSimon Schubert
14005796c8dcSSimon Schubert /* The match routines may not allocate if (1) they would do it with malloc
14015796c8dcSSimon Schubert and (2) it's not safe for them to use malloc.
14025796c8dcSSimon Schubert Note that if REL_ALLOC is defined, matching would not use malloc for the
14035796c8dcSSimon Schubert failure stack, but we would still use it for the register vectors;
14045796c8dcSSimon Schubert so REL_ALLOC should not affect this. */
14055796c8dcSSimon Schubert # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
14065796c8dcSSimon Schubert # undef MATCH_MAY_ALLOCATE
14075796c8dcSSimon Schubert # endif
14085796c8dcSSimon Schubert #endif /* not DEFINED_ONCE */
14095796c8dcSSimon Schubert
14105796c8dcSSimon Schubert #ifdef INSIDE_RECURSION
14115796c8dcSSimon Schubert /* Failure stack declarations and macros; both re_compile_fastmap and
14125796c8dcSSimon Schubert re_match_2 use a failure stack. These have to be macros because of
14135796c8dcSSimon Schubert REGEX_ALLOCATE_STACK. */
14145796c8dcSSimon Schubert
14155796c8dcSSimon Schubert
14165796c8dcSSimon Schubert /* Number of failure points for which to initially allocate space
14175796c8dcSSimon Schubert when matching. If this number is exceeded, we allocate more
14185796c8dcSSimon Schubert space, so it is not a hard limit. */
14195796c8dcSSimon Schubert # ifndef INIT_FAILURE_ALLOC
14205796c8dcSSimon Schubert # define INIT_FAILURE_ALLOC 5
14215796c8dcSSimon Schubert # endif
14225796c8dcSSimon Schubert
14235796c8dcSSimon Schubert /* Roughly the maximum number of failure points on the stack. Would be
14245796c8dcSSimon Schubert exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
14255796c8dcSSimon Schubert This is a variable only so users of regex can assign to it; we never
14265796c8dcSSimon Schubert change it ourselves. */
14275796c8dcSSimon Schubert
14285796c8dcSSimon Schubert # ifdef INT_IS_16BIT
14295796c8dcSSimon Schubert
14305796c8dcSSimon Schubert # ifndef DEFINED_ONCE
14315796c8dcSSimon Schubert # if defined MATCH_MAY_ALLOCATE
14325796c8dcSSimon Schubert /* 4400 was enough to cause a crash on Alpha OSF/1,
14335796c8dcSSimon Schubert whose default stack limit is 2mb. */
14345796c8dcSSimon Schubert long int re_max_failures = 4000;
14355796c8dcSSimon Schubert # else
14365796c8dcSSimon Schubert long int re_max_failures = 2000;
14375796c8dcSSimon Schubert # endif
14385796c8dcSSimon Schubert # endif
14395796c8dcSSimon Schubert
PREFIX(fail_stack_elt)14405796c8dcSSimon Schubert union PREFIX(fail_stack_elt)
14415796c8dcSSimon Schubert {
14425796c8dcSSimon Schubert UCHAR_T *pointer;
14435796c8dcSSimon Schubert long int integer;
14445796c8dcSSimon Schubert };
14455796c8dcSSimon Schubert
14465796c8dcSSimon Schubert typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
14475796c8dcSSimon Schubert
14485796c8dcSSimon Schubert typedef struct
14495796c8dcSSimon Schubert {
14505796c8dcSSimon Schubert PREFIX(fail_stack_elt_t) *stack;
14515796c8dcSSimon Schubert unsigned long int size;
14525796c8dcSSimon Schubert unsigned long int avail; /* Offset of next open position. */
14535796c8dcSSimon Schubert } PREFIX(fail_stack_type);
14545796c8dcSSimon Schubert
14555796c8dcSSimon Schubert # else /* not INT_IS_16BIT */
14565796c8dcSSimon Schubert
14575796c8dcSSimon Schubert # ifndef DEFINED_ONCE
14585796c8dcSSimon Schubert # if defined MATCH_MAY_ALLOCATE
14595796c8dcSSimon Schubert /* 4400 was enough to cause a crash on Alpha OSF/1,
14605796c8dcSSimon Schubert whose default stack limit is 2mb. */
14615796c8dcSSimon Schubert int re_max_failures = 4000;
14625796c8dcSSimon Schubert # else
14635796c8dcSSimon Schubert int re_max_failures = 2000;
14645796c8dcSSimon Schubert # endif
14655796c8dcSSimon Schubert # endif
14665796c8dcSSimon Schubert
PREFIX(fail_stack_elt)14675796c8dcSSimon Schubert union PREFIX(fail_stack_elt)
14685796c8dcSSimon Schubert {
14695796c8dcSSimon Schubert UCHAR_T *pointer;
14705796c8dcSSimon Schubert int integer;
14715796c8dcSSimon Schubert };
14725796c8dcSSimon Schubert
14735796c8dcSSimon Schubert typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
14745796c8dcSSimon Schubert
14755796c8dcSSimon Schubert typedef struct
14765796c8dcSSimon Schubert {
14775796c8dcSSimon Schubert PREFIX(fail_stack_elt_t) *stack;
14785796c8dcSSimon Schubert unsigned size;
14795796c8dcSSimon Schubert unsigned avail; /* Offset of next open position. */
14805796c8dcSSimon Schubert } PREFIX(fail_stack_type);
14815796c8dcSSimon Schubert
14825796c8dcSSimon Schubert # endif /* INT_IS_16BIT */
14835796c8dcSSimon Schubert
14845796c8dcSSimon Schubert # ifndef DEFINED_ONCE
14855796c8dcSSimon Schubert # define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
14865796c8dcSSimon Schubert # define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
14875796c8dcSSimon Schubert # define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
14885796c8dcSSimon Schubert # endif
14895796c8dcSSimon Schubert
14905796c8dcSSimon Schubert
14915796c8dcSSimon Schubert /* Define macros to initialize and free the failure stack.
14925796c8dcSSimon Schubert Do `return -2' if the alloc fails. */
14935796c8dcSSimon Schubert
14945796c8dcSSimon Schubert # ifdef MATCH_MAY_ALLOCATE
14955796c8dcSSimon Schubert # define INIT_FAIL_STACK() \
14965796c8dcSSimon Schubert do { \
14975796c8dcSSimon Schubert fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
14985796c8dcSSimon Schubert REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
14995796c8dcSSimon Schubert \
15005796c8dcSSimon Schubert if (fail_stack.stack == NULL) \
15015796c8dcSSimon Schubert return -2; \
15025796c8dcSSimon Schubert \
15035796c8dcSSimon Schubert fail_stack.size = INIT_FAILURE_ALLOC; \
15045796c8dcSSimon Schubert fail_stack.avail = 0; \
15055796c8dcSSimon Schubert } while (0)
15065796c8dcSSimon Schubert
15075796c8dcSSimon Schubert # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
15085796c8dcSSimon Schubert # else
15095796c8dcSSimon Schubert # define INIT_FAIL_STACK() \
15105796c8dcSSimon Schubert do { \
15115796c8dcSSimon Schubert fail_stack.avail = 0; \
15125796c8dcSSimon Schubert } while (0)
15135796c8dcSSimon Schubert
15145796c8dcSSimon Schubert # define RESET_FAIL_STACK()
15155796c8dcSSimon Schubert # endif
15165796c8dcSSimon Schubert
15175796c8dcSSimon Schubert
15185796c8dcSSimon Schubert /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
15195796c8dcSSimon Schubert
15205796c8dcSSimon Schubert Return 1 if succeeds, and 0 if either ran out of memory
15215796c8dcSSimon Schubert allocating space for it or it was already too large.
15225796c8dcSSimon Schubert
15235796c8dcSSimon Schubert REGEX_REALLOCATE_STACK requires `destination' be declared. */
15245796c8dcSSimon Schubert
15255796c8dcSSimon Schubert # define DOUBLE_FAIL_STACK(fail_stack) \
15265796c8dcSSimon Schubert ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
15275796c8dcSSimon Schubert ? 0 \
15285796c8dcSSimon Schubert : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
15295796c8dcSSimon Schubert REGEX_REALLOCATE_STACK ((fail_stack).stack, \
15305796c8dcSSimon Schubert (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
15315796c8dcSSimon Schubert ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
15325796c8dcSSimon Schubert \
15335796c8dcSSimon Schubert (fail_stack).stack == NULL \
15345796c8dcSSimon Schubert ? 0 \
15355796c8dcSSimon Schubert : ((fail_stack).size <<= 1, \
15365796c8dcSSimon Schubert 1)))
15375796c8dcSSimon Schubert
15385796c8dcSSimon Schubert
15395796c8dcSSimon Schubert /* Push pointer POINTER on FAIL_STACK.
15405796c8dcSSimon Schubert Return 1 if was able to do so and 0 if ran out of memory allocating
15415796c8dcSSimon Schubert space to do so. */
15425796c8dcSSimon Schubert # define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
15435796c8dcSSimon Schubert ((FAIL_STACK_FULL () \
15445796c8dcSSimon Schubert && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
15455796c8dcSSimon Schubert ? 0 \
15465796c8dcSSimon Schubert : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
15475796c8dcSSimon Schubert 1))
15485796c8dcSSimon Schubert
15495796c8dcSSimon Schubert /* Push a pointer value onto the failure stack.
15505796c8dcSSimon Schubert Assumes the variable `fail_stack'. Probably should only
15515796c8dcSSimon Schubert be called from within `PUSH_FAILURE_POINT'. */
15525796c8dcSSimon Schubert # define PUSH_FAILURE_POINTER(item) \
15535796c8dcSSimon Schubert fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
15545796c8dcSSimon Schubert
15555796c8dcSSimon Schubert /* This pushes an integer-valued item onto the failure stack.
15565796c8dcSSimon Schubert Assumes the variable `fail_stack'. Probably should only
15575796c8dcSSimon Schubert be called from within `PUSH_FAILURE_POINT'. */
15585796c8dcSSimon Schubert # define PUSH_FAILURE_INT(item) \
15595796c8dcSSimon Schubert fail_stack.stack[fail_stack.avail++].integer = (item)
15605796c8dcSSimon Schubert
15615796c8dcSSimon Schubert /* Push a fail_stack_elt_t value onto the failure stack.
15625796c8dcSSimon Schubert Assumes the variable `fail_stack'. Probably should only
15635796c8dcSSimon Schubert be called from within `PUSH_FAILURE_POINT'. */
15645796c8dcSSimon Schubert # define PUSH_FAILURE_ELT(item) \
15655796c8dcSSimon Schubert fail_stack.stack[fail_stack.avail++] = (item)
15665796c8dcSSimon Schubert
15675796c8dcSSimon Schubert /* These three POP... operations complement the three PUSH... operations.
15685796c8dcSSimon Schubert All assume that `fail_stack' is nonempty. */
15695796c8dcSSimon Schubert # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
15705796c8dcSSimon Schubert # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
15715796c8dcSSimon Schubert # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
15725796c8dcSSimon Schubert
15735796c8dcSSimon Schubert /* Used to omit pushing failure point id's when we're not debugging. */
15745796c8dcSSimon Schubert # ifdef DEBUG
15755796c8dcSSimon Schubert # define DEBUG_PUSH PUSH_FAILURE_INT
15765796c8dcSSimon Schubert # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
15775796c8dcSSimon Schubert # else
15785796c8dcSSimon Schubert # define DEBUG_PUSH(item)
15795796c8dcSSimon Schubert # define DEBUG_POP(item_addr)
15805796c8dcSSimon Schubert # endif
15815796c8dcSSimon Schubert
15825796c8dcSSimon Schubert
15835796c8dcSSimon Schubert /* Push the information about the state we will need
15845796c8dcSSimon Schubert if we ever fail back to it.
15855796c8dcSSimon Schubert
15865796c8dcSSimon Schubert Requires variables fail_stack, regstart, regend, reg_info, and
15875796c8dcSSimon Schubert num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
15885796c8dcSSimon Schubert be declared.
15895796c8dcSSimon Schubert
15905796c8dcSSimon Schubert Does `return FAILURE_CODE' if runs out of memory. */
15915796c8dcSSimon Schubert
15925796c8dcSSimon Schubert # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
15935796c8dcSSimon Schubert do { \
15945796c8dcSSimon Schubert char *destination; \
15955796c8dcSSimon Schubert /* Must be int, so when we don't save any registers, the arithmetic \
15965796c8dcSSimon Schubert of 0 + -1 isn't done as unsigned. */ \
15975796c8dcSSimon Schubert /* Can't be int, since there is not a shred of a guarantee that int \
15985796c8dcSSimon Schubert is wide enough to hold a value of something to which pointer can \
15995796c8dcSSimon Schubert be assigned */ \
16005796c8dcSSimon Schubert active_reg_t this_reg; \
16015796c8dcSSimon Schubert \
16025796c8dcSSimon Schubert DEBUG_STATEMENT (failure_id++); \
16035796c8dcSSimon Schubert DEBUG_STATEMENT (nfailure_points_pushed++); \
16045796c8dcSSimon Schubert DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
16055796c8dcSSimon Schubert DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
16065796c8dcSSimon Schubert DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
16075796c8dcSSimon Schubert \
16085796c8dcSSimon Schubert DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
16095796c8dcSSimon Schubert DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
16105796c8dcSSimon Schubert \
16115796c8dcSSimon Schubert /* Ensure we have enough space allocated for what we will push. */ \
16125796c8dcSSimon Schubert while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
16135796c8dcSSimon Schubert { \
16145796c8dcSSimon Schubert if (!DOUBLE_FAIL_STACK (fail_stack)) \
16155796c8dcSSimon Schubert return failure_code; \
16165796c8dcSSimon Schubert \
16175796c8dcSSimon Schubert DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
16185796c8dcSSimon Schubert (fail_stack).size); \
16195796c8dcSSimon Schubert DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
16205796c8dcSSimon Schubert } \
16215796c8dcSSimon Schubert \
16225796c8dcSSimon Schubert /* Push the info, starting with the registers. */ \
16235796c8dcSSimon Schubert DEBUG_PRINT1 ("\n"); \
16245796c8dcSSimon Schubert \
16255796c8dcSSimon Schubert if (1) \
16265796c8dcSSimon Schubert for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
16275796c8dcSSimon Schubert this_reg++) \
16285796c8dcSSimon Schubert { \
16295796c8dcSSimon Schubert DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
16305796c8dcSSimon Schubert DEBUG_STATEMENT (num_regs_pushed++); \
16315796c8dcSSimon Schubert \
16325796c8dcSSimon Schubert DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
16335796c8dcSSimon Schubert PUSH_FAILURE_POINTER (regstart[this_reg]); \
16345796c8dcSSimon Schubert \
16355796c8dcSSimon Schubert DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
16365796c8dcSSimon Schubert PUSH_FAILURE_POINTER (regend[this_reg]); \
16375796c8dcSSimon Schubert \
16385796c8dcSSimon Schubert DEBUG_PRINT2 (" info: %p\n ", \
16395796c8dcSSimon Schubert reg_info[this_reg].word.pointer); \
16405796c8dcSSimon Schubert DEBUG_PRINT2 (" match_null=%d", \
16415796c8dcSSimon Schubert REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
16425796c8dcSSimon Schubert DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
16435796c8dcSSimon Schubert DEBUG_PRINT2 (" matched_something=%d", \
16445796c8dcSSimon Schubert MATCHED_SOMETHING (reg_info[this_reg])); \
16455796c8dcSSimon Schubert DEBUG_PRINT2 (" ever_matched=%d", \
16465796c8dcSSimon Schubert EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
16475796c8dcSSimon Schubert DEBUG_PRINT1 ("\n"); \
16485796c8dcSSimon Schubert PUSH_FAILURE_ELT (reg_info[this_reg].word); \
16495796c8dcSSimon Schubert } \
16505796c8dcSSimon Schubert \
16515796c8dcSSimon Schubert DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
16525796c8dcSSimon Schubert PUSH_FAILURE_INT (lowest_active_reg); \
16535796c8dcSSimon Schubert \
16545796c8dcSSimon Schubert DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
16555796c8dcSSimon Schubert PUSH_FAILURE_INT (highest_active_reg); \
16565796c8dcSSimon Schubert \
16575796c8dcSSimon Schubert DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
16585796c8dcSSimon Schubert DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
16595796c8dcSSimon Schubert PUSH_FAILURE_POINTER (pattern_place); \
16605796c8dcSSimon Schubert \
16615796c8dcSSimon Schubert DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
16625796c8dcSSimon Schubert DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
16635796c8dcSSimon Schubert size2); \
16645796c8dcSSimon Schubert DEBUG_PRINT1 ("'\n"); \
16655796c8dcSSimon Schubert PUSH_FAILURE_POINTER (string_place); \
16665796c8dcSSimon Schubert \
16675796c8dcSSimon Schubert DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
16685796c8dcSSimon Schubert DEBUG_PUSH (failure_id); \
16695796c8dcSSimon Schubert } while (0)
16705796c8dcSSimon Schubert
16715796c8dcSSimon Schubert # ifndef DEFINED_ONCE
16725796c8dcSSimon Schubert /* This is the number of items that are pushed and popped on the stack
16735796c8dcSSimon Schubert for each register. */
16745796c8dcSSimon Schubert # define NUM_REG_ITEMS 3
16755796c8dcSSimon Schubert
16765796c8dcSSimon Schubert /* Individual items aside from the registers. */
16775796c8dcSSimon Schubert # ifdef DEBUG
16785796c8dcSSimon Schubert # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
16795796c8dcSSimon Schubert # else
16805796c8dcSSimon Schubert # define NUM_NONREG_ITEMS 4
16815796c8dcSSimon Schubert # endif
16825796c8dcSSimon Schubert
16835796c8dcSSimon Schubert /* We push at most this many items on the stack. */
16845796c8dcSSimon Schubert /* We used to use (num_regs - 1), which is the number of registers
16855796c8dcSSimon Schubert this regexp will save; but that was changed to 5
16865796c8dcSSimon Schubert to avoid stack overflow for a regexp with lots of parens. */
16875796c8dcSSimon Schubert # define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
16885796c8dcSSimon Schubert
16895796c8dcSSimon Schubert /* We actually push this many items. */
16905796c8dcSSimon Schubert # define NUM_FAILURE_ITEMS \
16915796c8dcSSimon Schubert (((0 \
16925796c8dcSSimon Schubert ? 0 : highest_active_reg - lowest_active_reg + 1) \
16935796c8dcSSimon Schubert * NUM_REG_ITEMS) \
16945796c8dcSSimon Schubert + NUM_NONREG_ITEMS)
16955796c8dcSSimon Schubert
16965796c8dcSSimon Schubert /* How many items can still be added to the stack without overflowing it. */
16975796c8dcSSimon Schubert # define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
16985796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
16995796c8dcSSimon Schubert
17005796c8dcSSimon Schubert
17015796c8dcSSimon Schubert /* Pops what PUSH_FAIL_STACK pushes.
17025796c8dcSSimon Schubert
17035796c8dcSSimon Schubert We restore into the parameters, all of which should be lvalues:
17045796c8dcSSimon Schubert STR -- the saved data position.
17055796c8dcSSimon Schubert PAT -- the saved pattern position.
17065796c8dcSSimon Schubert LOW_REG, HIGH_REG -- the highest and lowest active registers.
17075796c8dcSSimon Schubert REGSTART, REGEND -- arrays of string positions.
17085796c8dcSSimon Schubert REG_INFO -- array of information about each subexpression.
17095796c8dcSSimon Schubert
17105796c8dcSSimon Schubert Also assumes the variables `fail_stack' and (if debugging), `bufp',
17115796c8dcSSimon Schubert `pend', `string1', `size1', `string2', and `size2'. */
17125796c8dcSSimon Schubert # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
17135796c8dcSSimon Schubert { \
17145796c8dcSSimon Schubert DEBUG_STATEMENT (unsigned failure_id;) \
17155796c8dcSSimon Schubert active_reg_t this_reg; \
17165796c8dcSSimon Schubert const UCHAR_T *string_temp; \
17175796c8dcSSimon Schubert \
17185796c8dcSSimon Schubert assert (!FAIL_STACK_EMPTY ()); \
17195796c8dcSSimon Schubert \
17205796c8dcSSimon Schubert /* Remove failure points and point to how many regs pushed. */ \
17215796c8dcSSimon Schubert DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
17225796c8dcSSimon Schubert DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
17235796c8dcSSimon Schubert DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
17245796c8dcSSimon Schubert \
17255796c8dcSSimon Schubert assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
17265796c8dcSSimon Schubert \
17275796c8dcSSimon Schubert DEBUG_POP (&failure_id); \
17285796c8dcSSimon Schubert DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
17295796c8dcSSimon Schubert \
17305796c8dcSSimon Schubert /* If the saved string location is NULL, it came from an \
17315796c8dcSSimon Schubert on_failure_keep_string_jump opcode, and we want to throw away the \
17325796c8dcSSimon Schubert saved NULL, thus retaining our current position in the string. */ \
17335796c8dcSSimon Schubert string_temp = POP_FAILURE_POINTER (); \
17345796c8dcSSimon Schubert if (string_temp != NULL) \
17355796c8dcSSimon Schubert str = (const CHAR_T *) string_temp; \
17365796c8dcSSimon Schubert \
17375796c8dcSSimon Schubert DEBUG_PRINT2 (" Popping string %p: `", str); \
17385796c8dcSSimon Schubert DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
17395796c8dcSSimon Schubert DEBUG_PRINT1 ("'\n"); \
17405796c8dcSSimon Schubert \
17415796c8dcSSimon Schubert pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
17425796c8dcSSimon Schubert DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
17435796c8dcSSimon Schubert DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
17445796c8dcSSimon Schubert \
17455796c8dcSSimon Schubert /* Restore register info. */ \
17465796c8dcSSimon Schubert high_reg = (active_reg_t) POP_FAILURE_INT (); \
17475796c8dcSSimon Schubert DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
17485796c8dcSSimon Schubert \
17495796c8dcSSimon Schubert low_reg = (active_reg_t) POP_FAILURE_INT (); \
17505796c8dcSSimon Schubert DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
17515796c8dcSSimon Schubert \
17525796c8dcSSimon Schubert if (1) \
17535796c8dcSSimon Schubert for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
17545796c8dcSSimon Schubert { \
17555796c8dcSSimon Schubert DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
17565796c8dcSSimon Schubert \
17575796c8dcSSimon Schubert reg_info[this_reg].word = POP_FAILURE_ELT (); \
17585796c8dcSSimon Schubert DEBUG_PRINT2 (" info: %p\n", \
17595796c8dcSSimon Schubert reg_info[this_reg].word.pointer); \
17605796c8dcSSimon Schubert \
17615796c8dcSSimon Schubert regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
17625796c8dcSSimon Schubert DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
17635796c8dcSSimon Schubert \
17645796c8dcSSimon Schubert regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
17655796c8dcSSimon Schubert DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
17665796c8dcSSimon Schubert } \
17675796c8dcSSimon Schubert else \
17685796c8dcSSimon Schubert { \
17695796c8dcSSimon Schubert for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
17705796c8dcSSimon Schubert { \
17715796c8dcSSimon Schubert reg_info[this_reg].word.integer = 0; \
17725796c8dcSSimon Schubert regend[this_reg] = 0; \
17735796c8dcSSimon Schubert regstart[this_reg] = 0; \
17745796c8dcSSimon Schubert } \
17755796c8dcSSimon Schubert highest_active_reg = high_reg; \
17765796c8dcSSimon Schubert } \
17775796c8dcSSimon Schubert \
17785796c8dcSSimon Schubert set_regs_matched_done = 0; \
17795796c8dcSSimon Schubert DEBUG_STATEMENT (nfailure_points_popped++); \
17805796c8dcSSimon Schubert } /* POP_FAILURE_POINT */
17815796c8dcSSimon Schubert
17825796c8dcSSimon Schubert /* Structure for per-register (a.k.a. per-group) information.
17835796c8dcSSimon Schubert Other register information, such as the
17845796c8dcSSimon Schubert starting and ending positions (which are addresses), and the list of
17855796c8dcSSimon Schubert inner groups (which is a bits list) are maintained in separate
17865796c8dcSSimon Schubert variables.
17875796c8dcSSimon Schubert
17885796c8dcSSimon Schubert We are making a (strictly speaking) nonportable assumption here: that
17895796c8dcSSimon Schubert the compiler will pack our bit fields into something that fits into
17905796c8dcSSimon Schubert the type of `word', i.e., is something that fits into one item on the
17915796c8dcSSimon Schubert failure stack. */
17925796c8dcSSimon Schubert
17935796c8dcSSimon Schubert
17945796c8dcSSimon Schubert /* Declarations and macros for re_match_2. */
17955796c8dcSSimon Schubert
17965796c8dcSSimon Schubert typedef union
17975796c8dcSSimon Schubert {
17985796c8dcSSimon Schubert PREFIX(fail_stack_elt_t) word;
17995796c8dcSSimon Schubert struct
18005796c8dcSSimon Schubert {
18015796c8dcSSimon Schubert /* This field is one if this group can match the empty string,
18025796c8dcSSimon Schubert zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
18035796c8dcSSimon Schubert # define MATCH_NULL_UNSET_VALUE 3
18045796c8dcSSimon Schubert unsigned match_null_string_p : 2;
18055796c8dcSSimon Schubert unsigned is_active : 1;
18065796c8dcSSimon Schubert unsigned matched_something : 1;
18075796c8dcSSimon Schubert unsigned ever_matched_something : 1;
18085796c8dcSSimon Schubert } bits;
18095796c8dcSSimon Schubert } PREFIX(register_info_type);
18105796c8dcSSimon Schubert
18115796c8dcSSimon Schubert # ifndef DEFINED_ONCE
18125796c8dcSSimon Schubert # define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
18135796c8dcSSimon Schubert # define IS_ACTIVE(R) ((R).bits.is_active)
18145796c8dcSSimon Schubert # define MATCHED_SOMETHING(R) ((R).bits.matched_something)
18155796c8dcSSimon Schubert # define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
18165796c8dcSSimon Schubert
18175796c8dcSSimon Schubert
18185796c8dcSSimon Schubert /* Call this when have matched a real character; it sets `matched' flags
18195796c8dcSSimon Schubert for the subexpressions which we are currently inside. Also records
18205796c8dcSSimon Schubert that those subexprs have matched. */
18215796c8dcSSimon Schubert # define SET_REGS_MATCHED() \
18225796c8dcSSimon Schubert do \
18235796c8dcSSimon Schubert { \
18245796c8dcSSimon Schubert if (!set_regs_matched_done) \
18255796c8dcSSimon Schubert { \
18265796c8dcSSimon Schubert active_reg_t r; \
18275796c8dcSSimon Schubert set_regs_matched_done = 1; \
18285796c8dcSSimon Schubert for (r = lowest_active_reg; r <= highest_active_reg; r++) \
18295796c8dcSSimon Schubert { \
18305796c8dcSSimon Schubert MATCHED_SOMETHING (reg_info[r]) \
18315796c8dcSSimon Schubert = EVER_MATCHED_SOMETHING (reg_info[r]) \
18325796c8dcSSimon Schubert = 1; \
18335796c8dcSSimon Schubert } \
18345796c8dcSSimon Schubert } \
18355796c8dcSSimon Schubert } \
18365796c8dcSSimon Schubert while (0)
18375796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
18385796c8dcSSimon Schubert
18395796c8dcSSimon Schubert /* Registers are set to a sentinel when they haven't yet matched. */
18405796c8dcSSimon Schubert static CHAR_T PREFIX(reg_unset_dummy);
18415796c8dcSSimon Schubert # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
18425796c8dcSSimon Schubert # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
18435796c8dcSSimon Schubert
18445796c8dcSSimon Schubert /* Subroutine declarations and macros for regex_compile. */
18455796c8dcSSimon Schubert static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
18465796c8dcSSimon Schubert static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
18475796c8dcSSimon Schubert int arg1, int arg2);
18485796c8dcSSimon Schubert static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
18495796c8dcSSimon Schubert int arg, UCHAR_T *end);
18505796c8dcSSimon Schubert static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
18515796c8dcSSimon Schubert int arg1, int arg2, UCHAR_T *end);
18525796c8dcSSimon Schubert static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
18535796c8dcSSimon Schubert const CHAR_T *p,
18545796c8dcSSimon Schubert reg_syntax_t syntax);
18555796c8dcSSimon Schubert static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
18565796c8dcSSimon Schubert const CHAR_T *pend,
18575796c8dcSSimon Schubert reg_syntax_t syntax);
18585796c8dcSSimon Schubert # ifdef WCHAR
18595796c8dcSSimon Schubert static reg_errcode_t wcs_compile_range (CHAR_T range_start,
18605796c8dcSSimon Schubert const CHAR_T **p_ptr,
18615796c8dcSSimon Schubert const CHAR_T *pend,
18625796c8dcSSimon Schubert char *translate,
18635796c8dcSSimon Schubert reg_syntax_t syntax,
18645796c8dcSSimon Schubert UCHAR_T *b,
18655796c8dcSSimon Schubert CHAR_T *char_set);
18665796c8dcSSimon Schubert static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
18675796c8dcSSimon Schubert # else /* BYTE */
18685796c8dcSSimon Schubert static reg_errcode_t byte_compile_range (unsigned int range_start,
18695796c8dcSSimon Schubert const char **p_ptr,
18705796c8dcSSimon Schubert const char *pend,
18715796c8dcSSimon Schubert char *translate,
18725796c8dcSSimon Schubert reg_syntax_t syntax,
18735796c8dcSSimon Schubert unsigned char *b);
18745796c8dcSSimon Schubert # endif /* WCHAR */
18755796c8dcSSimon Schubert
18765796c8dcSSimon Schubert /* Fetch the next character in the uncompiled pattern---translating it
18775796c8dcSSimon Schubert if necessary. Also cast from a signed character in the constant
18785796c8dcSSimon Schubert string passed to us by the user to an unsigned char that we can use
18795796c8dcSSimon Schubert as an array index (in, e.g., `translate'). */
18805796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
18815796c8dcSSimon Schubert because it is impossible to allocate 4GB array for some encodings
18825796c8dcSSimon Schubert which have 4 byte character_set like UCS4. */
18835796c8dcSSimon Schubert # ifndef PATFETCH
18845796c8dcSSimon Schubert # ifdef WCHAR
18855796c8dcSSimon Schubert # define PATFETCH(c) \
18865796c8dcSSimon Schubert do {if (p == pend) return REG_EEND; \
18875796c8dcSSimon Schubert c = (UCHAR_T) *p++; \
18885796c8dcSSimon Schubert if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
18895796c8dcSSimon Schubert } while (0)
18905796c8dcSSimon Schubert # else /* BYTE */
18915796c8dcSSimon Schubert # define PATFETCH(c) \
18925796c8dcSSimon Schubert do {if (p == pend) return REG_EEND; \
18935796c8dcSSimon Schubert c = (unsigned char) *p++; \
18945796c8dcSSimon Schubert if (translate) c = (unsigned char) translate[c]; \
18955796c8dcSSimon Schubert } while (0)
18965796c8dcSSimon Schubert # endif /* WCHAR */
18975796c8dcSSimon Schubert # endif
18985796c8dcSSimon Schubert
18995796c8dcSSimon Schubert /* Fetch the next character in the uncompiled pattern, with no
19005796c8dcSSimon Schubert translation. */
19015796c8dcSSimon Schubert # define PATFETCH_RAW(c) \
19025796c8dcSSimon Schubert do {if (p == pend) return REG_EEND; \
19035796c8dcSSimon Schubert c = (UCHAR_T) *p++; \
19045796c8dcSSimon Schubert } while (0)
19055796c8dcSSimon Schubert
19065796c8dcSSimon Schubert /* Go backwards one character in the pattern. */
19075796c8dcSSimon Schubert # define PATUNFETCH p--
19085796c8dcSSimon Schubert
19095796c8dcSSimon Schubert
19105796c8dcSSimon Schubert /* If `translate' is non-null, return translate[D], else just D. We
19115796c8dcSSimon Schubert cast the subscript to translate because some data is declared as
19125796c8dcSSimon Schubert `char *', to avoid warnings when a string constant is passed. But
19135796c8dcSSimon Schubert when we use a character as a subscript we must make it unsigned. */
19145796c8dcSSimon Schubert /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
19155796c8dcSSimon Schubert because it is impossible to allocate 4GB array for some encodings
19165796c8dcSSimon Schubert which have 4 byte character_set like UCS4. */
19175796c8dcSSimon Schubert
19185796c8dcSSimon Schubert # ifndef TRANSLATE
19195796c8dcSSimon Schubert # ifdef WCHAR
19205796c8dcSSimon Schubert # define TRANSLATE(d) \
19215796c8dcSSimon Schubert ((translate && ((UCHAR_T) (d)) <= 0xff) \
19225796c8dcSSimon Schubert ? (char) translate[(unsigned char) (d)] : (d))
19235796c8dcSSimon Schubert # else /* BYTE */
19245796c8dcSSimon Schubert # define TRANSLATE(d) \
19255796c8dcSSimon Schubert (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
19265796c8dcSSimon Schubert # endif /* WCHAR */
19275796c8dcSSimon Schubert # endif
19285796c8dcSSimon Schubert
19295796c8dcSSimon Schubert
19305796c8dcSSimon Schubert /* Macros for outputting the compiled pattern into `buffer'. */
19315796c8dcSSimon Schubert
19325796c8dcSSimon Schubert /* If the buffer isn't allocated when it comes in, use this. */
19335796c8dcSSimon Schubert # define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
19345796c8dcSSimon Schubert
19355796c8dcSSimon Schubert /* Make sure we have at least N more bytes of space in buffer. */
19365796c8dcSSimon Schubert # ifdef WCHAR
19375796c8dcSSimon Schubert # define GET_BUFFER_SPACE(n) \
19385796c8dcSSimon Schubert while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
19395796c8dcSSimon Schubert + (n)*sizeof(CHAR_T)) > bufp->allocated) \
19405796c8dcSSimon Schubert EXTEND_BUFFER ()
19415796c8dcSSimon Schubert # else /* BYTE */
19425796c8dcSSimon Schubert # define GET_BUFFER_SPACE(n) \
19435796c8dcSSimon Schubert while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
19445796c8dcSSimon Schubert EXTEND_BUFFER ()
19455796c8dcSSimon Schubert # endif /* WCHAR */
19465796c8dcSSimon Schubert
19475796c8dcSSimon Schubert /* Make sure we have one more byte of buffer space and then add C to it. */
19485796c8dcSSimon Schubert # define BUF_PUSH(c) \
19495796c8dcSSimon Schubert do { \
19505796c8dcSSimon Schubert GET_BUFFER_SPACE (1); \
19515796c8dcSSimon Schubert *b++ = (UCHAR_T) (c); \
19525796c8dcSSimon Schubert } while (0)
19535796c8dcSSimon Schubert
19545796c8dcSSimon Schubert
19555796c8dcSSimon Schubert /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
19565796c8dcSSimon Schubert # define BUF_PUSH_2(c1, c2) \
19575796c8dcSSimon Schubert do { \
19585796c8dcSSimon Schubert GET_BUFFER_SPACE (2); \
19595796c8dcSSimon Schubert *b++ = (UCHAR_T) (c1); \
19605796c8dcSSimon Schubert *b++ = (UCHAR_T) (c2); \
19615796c8dcSSimon Schubert } while (0)
19625796c8dcSSimon Schubert
19635796c8dcSSimon Schubert
19645796c8dcSSimon Schubert /* As with BUF_PUSH_2, except for three bytes. */
19655796c8dcSSimon Schubert # define BUF_PUSH_3(c1, c2, c3) \
19665796c8dcSSimon Schubert do { \
19675796c8dcSSimon Schubert GET_BUFFER_SPACE (3); \
19685796c8dcSSimon Schubert *b++ = (UCHAR_T) (c1); \
19695796c8dcSSimon Schubert *b++ = (UCHAR_T) (c2); \
19705796c8dcSSimon Schubert *b++ = (UCHAR_T) (c3); \
19715796c8dcSSimon Schubert } while (0)
19725796c8dcSSimon Schubert
19735796c8dcSSimon Schubert /* Store a jump with opcode OP at LOC to location TO. We store a
19745796c8dcSSimon Schubert relative address offset by the three bytes the jump itself occupies. */
19755796c8dcSSimon Schubert # define STORE_JUMP(op, loc, to) \
19765796c8dcSSimon Schubert PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
19775796c8dcSSimon Schubert
19785796c8dcSSimon Schubert /* Likewise, for a two-argument jump. */
19795796c8dcSSimon Schubert # define STORE_JUMP2(op, loc, to, arg) \
19805796c8dcSSimon Schubert PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
19815796c8dcSSimon Schubert
19825796c8dcSSimon Schubert /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
19835796c8dcSSimon Schubert # define INSERT_JUMP(op, loc, to) \
19845796c8dcSSimon Schubert PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
19855796c8dcSSimon Schubert
19865796c8dcSSimon Schubert /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
19875796c8dcSSimon Schubert # define INSERT_JUMP2(op, loc, to, arg) \
19885796c8dcSSimon Schubert PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
19895796c8dcSSimon Schubert arg, b)
19905796c8dcSSimon Schubert
19915796c8dcSSimon Schubert /* This is not an arbitrary limit: the arguments which represent offsets
19925796c8dcSSimon Schubert into the pattern are two bytes long. So if 2^16 bytes turns out to
19935796c8dcSSimon Schubert be too small, many things would have to change. */
19945796c8dcSSimon Schubert /* Any other compiler which, like MSC, has allocation limit below 2^16
19955796c8dcSSimon Schubert bytes will have to use approach similar to what was done below for
19965796c8dcSSimon Schubert MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
19975796c8dcSSimon Schubert reallocating to 0 bytes. Such thing is not going to work too well.
19985796c8dcSSimon Schubert You have been warned!! */
19995796c8dcSSimon Schubert # ifndef DEFINED_ONCE
20005796c8dcSSimon Schubert # if defined _MSC_VER && !defined WIN32
20015796c8dcSSimon Schubert /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
20025796c8dcSSimon Schubert The REALLOC define eliminates a flurry of conversion warnings,
20035796c8dcSSimon Schubert but is not required. */
20045796c8dcSSimon Schubert # define MAX_BUF_SIZE 65500L
20055796c8dcSSimon Schubert # define REALLOC(p,s) realloc ((p), (size_t) (s))
20065796c8dcSSimon Schubert # else
20075796c8dcSSimon Schubert # define MAX_BUF_SIZE (1L << 16)
20085796c8dcSSimon Schubert # define REALLOC(p,s) realloc ((p), (s))
20095796c8dcSSimon Schubert # endif
20105796c8dcSSimon Schubert
20115796c8dcSSimon Schubert /* Extend the buffer by twice its current size via realloc and
20125796c8dcSSimon Schubert reset the pointers that pointed into the old block to point to the
20135796c8dcSSimon Schubert correct places in the new one. If extending the buffer results in it
20145796c8dcSSimon Schubert being larger than MAX_BUF_SIZE, then flag memory exhausted. */
20155796c8dcSSimon Schubert # if __BOUNDED_POINTERS__
20165796c8dcSSimon Schubert # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
20175796c8dcSSimon Schubert # define MOVE_BUFFER_POINTER(P) \
20185796c8dcSSimon Schubert (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
20195796c8dcSSimon Schubert # define ELSE_EXTEND_BUFFER_HIGH_BOUND \
20205796c8dcSSimon Schubert else \
20215796c8dcSSimon Schubert { \
20225796c8dcSSimon Schubert SET_HIGH_BOUND (b); \
20235796c8dcSSimon Schubert SET_HIGH_BOUND (begalt); \
20245796c8dcSSimon Schubert if (fixup_alt_jump) \
20255796c8dcSSimon Schubert SET_HIGH_BOUND (fixup_alt_jump); \
20265796c8dcSSimon Schubert if (laststart) \
20275796c8dcSSimon Schubert SET_HIGH_BOUND (laststart); \
20285796c8dcSSimon Schubert if (pending_exact) \
20295796c8dcSSimon Schubert SET_HIGH_BOUND (pending_exact); \
20305796c8dcSSimon Schubert }
20315796c8dcSSimon Schubert # else
20325796c8dcSSimon Schubert # define MOVE_BUFFER_POINTER(P) (P) += incr
20335796c8dcSSimon Schubert # define ELSE_EXTEND_BUFFER_HIGH_BOUND
20345796c8dcSSimon Schubert # endif
20355796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
20365796c8dcSSimon Schubert
20375796c8dcSSimon Schubert # ifdef WCHAR
20385796c8dcSSimon Schubert # define EXTEND_BUFFER() \
20395796c8dcSSimon Schubert do { \
20405796c8dcSSimon Schubert UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
20415796c8dcSSimon Schubert int wchar_count; \
20425796c8dcSSimon Schubert if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
20435796c8dcSSimon Schubert return REG_ESIZE; \
20445796c8dcSSimon Schubert bufp->allocated <<= 1; \
20455796c8dcSSimon Schubert if (bufp->allocated > MAX_BUF_SIZE) \
20465796c8dcSSimon Schubert bufp->allocated = MAX_BUF_SIZE; \
20475796c8dcSSimon Schubert /* How many characters the new buffer can have? */ \
20485796c8dcSSimon Schubert wchar_count = bufp->allocated / sizeof(UCHAR_T); \
20495796c8dcSSimon Schubert if (wchar_count == 0) wchar_count = 1; \
20505796c8dcSSimon Schubert /* Truncate the buffer to CHAR_T align. */ \
20515796c8dcSSimon Schubert bufp->allocated = wchar_count * sizeof(UCHAR_T); \
20525796c8dcSSimon Schubert RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
20535796c8dcSSimon Schubert bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
20545796c8dcSSimon Schubert if (COMPILED_BUFFER_VAR == NULL) \
20555796c8dcSSimon Schubert return REG_ESPACE; \
20565796c8dcSSimon Schubert /* If the buffer moved, move all the pointers into it. */ \
20575796c8dcSSimon Schubert if (old_buffer != COMPILED_BUFFER_VAR) \
20585796c8dcSSimon Schubert { \
2059*ef5ccd6cSJohn Marino PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \
20605796c8dcSSimon Schubert MOVE_BUFFER_POINTER (b); \
20615796c8dcSSimon Schubert MOVE_BUFFER_POINTER (begalt); \
20625796c8dcSSimon Schubert if (fixup_alt_jump) \
20635796c8dcSSimon Schubert MOVE_BUFFER_POINTER (fixup_alt_jump); \
20645796c8dcSSimon Schubert if (laststart) \
20655796c8dcSSimon Schubert MOVE_BUFFER_POINTER (laststart); \
20665796c8dcSSimon Schubert if (pending_exact) \
20675796c8dcSSimon Schubert MOVE_BUFFER_POINTER (pending_exact); \
20685796c8dcSSimon Schubert } \
20695796c8dcSSimon Schubert ELSE_EXTEND_BUFFER_HIGH_BOUND \
20705796c8dcSSimon Schubert } while (0)
20715796c8dcSSimon Schubert # else /* BYTE */
20725796c8dcSSimon Schubert # define EXTEND_BUFFER() \
20735796c8dcSSimon Schubert do { \
20745796c8dcSSimon Schubert UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
20755796c8dcSSimon Schubert if (bufp->allocated == MAX_BUF_SIZE) \
20765796c8dcSSimon Schubert return REG_ESIZE; \
20775796c8dcSSimon Schubert bufp->allocated <<= 1; \
20785796c8dcSSimon Schubert if (bufp->allocated > MAX_BUF_SIZE) \
20795796c8dcSSimon Schubert bufp->allocated = MAX_BUF_SIZE; \
20805796c8dcSSimon Schubert bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
20815796c8dcSSimon Schubert bufp->allocated); \
20825796c8dcSSimon Schubert if (COMPILED_BUFFER_VAR == NULL) \
20835796c8dcSSimon Schubert return REG_ESPACE; \
20845796c8dcSSimon Schubert /* If the buffer moved, move all the pointers into it. */ \
20855796c8dcSSimon Schubert if (old_buffer != COMPILED_BUFFER_VAR) \
20865796c8dcSSimon Schubert { \
2087*ef5ccd6cSJohn Marino PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \
20885796c8dcSSimon Schubert MOVE_BUFFER_POINTER (b); \
20895796c8dcSSimon Schubert MOVE_BUFFER_POINTER (begalt); \
20905796c8dcSSimon Schubert if (fixup_alt_jump) \
20915796c8dcSSimon Schubert MOVE_BUFFER_POINTER (fixup_alt_jump); \
20925796c8dcSSimon Schubert if (laststart) \
20935796c8dcSSimon Schubert MOVE_BUFFER_POINTER (laststart); \
20945796c8dcSSimon Schubert if (pending_exact) \
20955796c8dcSSimon Schubert MOVE_BUFFER_POINTER (pending_exact); \
20965796c8dcSSimon Schubert } \
20975796c8dcSSimon Schubert ELSE_EXTEND_BUFFER_HIGH_BOUND \
20985796c8dcSSimon Schubert } while (0)
20995796c8dcSSimon Schubert # endif /* WCHAR */
21005796c8dcSSimon Schubert
21015796c8dcSSimon Schubert # ifndef DEFINED_ONCE
21025796c8dcSSimon Schubert /* Since we have one byte reserved for the register number argument to
21035796c8dcSSimon Schubert {start,stop}_memory, the maximum number of groups we can report
21045796c8dcSSimon Schubert things about is what fits in that byte. */
21055796c8dcSSimon Schubert # define MAX_REGNUM 255
21065796c8dcSSimon Schubert
21075796c8dcSSimon Schubert /* But patterns can have more than `MAX_REGNUM' registers. We just
21085796c8dcSSimon Schubert ignore the excess. */
21095796c8dcSSimon Schubert typedef unsigned regnum_t;
21105796c8dcSSimon Schubert
21115796c8dcSSimon Schubert
21125796c8dcSSimon Schubert /* Macros for the compile stack. */
21135796c8dcSSimon Schubert
21145796c8dcSSimon Schubert /* Since offsets can go either forwards or backwards, this type needs to
21155796c8dcSSimon Schubert be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
21165796c8dcSSimon Schubert /* int may be not enough when sizeof(int) == 2. */
21175796c8dcSSimon Schubert typedef long pattern_offset_t;
21185796c8dcSSimon Schubert
21195796c8dcSSimon Schubert typedef struct
21205796c8dcSSimon Schubert {
21215796c8dcSSimon Schubert pattern_offset_t begalt_offset;
21225796c8dcSSimon Schubert pattern_offset_t fixup_alt_jump;
21235796c8dcSSimon Schubert pattern_offset_t inner_group_offset;
21245796c8dcSSimon Schubert pattern_offset_t laststart_offset;
21255796c8dcSSimon Schubert regnum_t regnum;
21265796c8dcSSimon Schubert } compile_stack_elt_t;
21275796c8dcSSimon Schubert
21285796c8dcSSimon Schubert
21295796c8dcSSimon Schubert typedef struct
21305796c8dcSSimon Schubert {
21315796c8dcSSimon Schubert compile_stack_elt_t *stack;
21325796c8dcSSimon Schubert unsigned size;
21335796c8dcSSimon Schubert unsigned avail; /* Offset of next open position. */
21345796c8dcSSimon Schubert } compile_stack_type;
21355796c8dcSSimon Schubert
21365796c8dcSSimon Schubert
21375796c8dcSSimon Schubert # define INIT_COMPILE_STACK_SIZE 32
21385796c8dcSSimon Schubert
21395796c8dcSSimon Schubert # define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
21405796c8dcSSimon Schubert # define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
21415796c8dcSSimon Schubert
21425796c8dcSSimon Schubert /* The next available element. */
21435796c8dcSSimon Schubert # define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
21445796c8dcSSimon Schubert
21455796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
21465796c8dcSSimon Schubert
21475796c8dcSSimon Schubert /* Set the bit for character C in a list. */
21485796c8dcSSimon Schubert # ifndef DEFINED_ONCE
21495796c8dcSSimon Schubert # define SET_LIST_BIT(c) \
21505796c8dcSSimon Schubert (b[((unsigned char) (c)) / BYTEWIDTH] \
21515796c8dcSSimon Schubert |= 1 << (((unsigned char) c) % BYTEWIDTH))
21525796c8dcSSimon Schubert # endif /* DEFINED_ONCE */
21535796c8dcSSimon Schubert
21545796c8dcSSimon Schubert /* Get the next unsigned number in the uncompiled pattern. */
21555796c8dcSSimon Schubert # define GET_UNSIGNED_NUMBER(num) \
21565796c8dcSSimon Schubert { \
21575796c8dcSSimon Schubert while (p != pend) \
21585796c8dcSSimon Schubert { \
21595796c8dcSSimon Schubert PATFETCH (c); \
21605796c8dcSSimon Schubert if (c < '0' || c > '9') \
21615796c8dcSSimon Schubert break; \
21625796c8dcSSimon Schubert if (num <= RE_DUP_MAX) \
21635796c8dcSSimon Schubert { \
21645796c8dcSSimon Schubert if (num < 0) \
21655796c8dcSSimon Schubert num = 0; \
21665796c8dcSSimon Schubert num = num * 10 + c - '0'; \
21675796c8dcSSimon Schubert } \
21685796c8dcSSimon Schubert } \
21695796c8dcSSimon Schubert }
21705796c8dcSSimon Schubert
21715796c8dcSSimon Schubert # ifndef DEFINED_ONCE
21725796c8dcSSimon Schubert # if defined _LIBC || WIDE_CHAR_SUPPORT
21735796c8dcSSimon Schubert /* The GNU C library provides support for user-defined character classes
21745796c8dcSSimon Schubert and the functions from ISO C amendement 1. */
21755796c8dcSSimon Schubert # ifdef CHARCLASS_NAME_MAX
21765796c8dcSSimon Schubert # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
21775796c8dcSSimon Schubert # else
21785796c8dcSSimon Schubert /* This shouldn't happen but some implementation might still have this
21795796c8dcSSimon Schubert problem. Use a reasonable default value. */
21805796c8dcSSimon Schubert # define CHAR_CLASS_MAX_LENGTH 256
21815796c8dcSSimon Schubert # endif
21825796c8dcSSimon Schubert
21835796c8dcSSimon Schubert # ifdef _LIBC
21845796c8dcSSimon Schubert # define IS_CHAR_CLASS(string) __wctype (string)
21855796c8dcSSimon Schubert # else
21865796c8dcSSimon Schubert # define IS_CHAR_CLASS(string) wctype (string)
21875796c8dcSSimon Schubert # endif
21885796c8dcSSimon Schubert # else
21895796c8dcSSimon Schubert # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
21905796c8dcSSimon Schubert
21915796c8dcSSimon Schubert # define IS_CHAR_CLASS(string) \
21925796c8dcSSimon Schubert (STREQ (string, "alpha") || STREQ (string, "upper") \
21935796c8dcSSimon Schubert || STREQ (string, "lower") || STREQ (string, "digit") \
21945796c8dcSSimon Schubert || STREQ (string, "alnum") || STREQ (string, "xdigit") \
21955796c8dcSSimon Schubert || STREQ (string, "space") || STREQ (string, "print") \
21965796c8dcSSimon Schubert || STREQ (string, "punct") || STREQ (string, "graph") \
21975796c8dcSSimon Schubert || STREQ (string, "cntrl") || STREQ (string, "blank"))
21985796c8dcSSimon Schubert # endif
21995796c8dcSSimon Schubert # endif /* DEFINED_ONCE */
22005796c8dcSSimon Schubert
22015796c8dcSSimon Schubert # ifndef MATCH_MAY_ALLOCATE
22025796c8dcSSimon Schubert
22035796c8dcSSimon Schubert /* If we cannot allocate large objects within re_match_2_internal,
22045796c8dcSSimon Schubert we make the fail stack and register vectors global.
22055796c8dcSSimon Schubert The fail stack, we grow to the maximum size when a regexp
22065796c8dcSSimon Schubert is compiled.
22075796c8dcSSimon Schubert The register vectors, we adjust in size each time we
22085796c8dcSSimon Schubert compile a regexp, according to the number of registers it needs. */
22095796c8dcSSimon Schubert
22105796c8dcSSimon Schubert static PREFIX(fail_stack_type) fail_stack;
22115796c8dcSSimon Schubert
22125796c8dcSSimon Schubert /* Size with which the following vectors are currently allocated.
22135796c8dcSSimon Schubert That is so we can make them bigger as needed,
22145796c8dcSSimon Schubert but never make them smaller. */
22155796c8dcSSimon Schubert # ifdef DEFINED_ONCE
22165796c8dcSSimon Schubert static int regs_allocated_size;
22175796c8dcSSimon Schubert
22185796c8dcSSimon Schubert static const char ** regstart, ** regend;
22195796c8dcSSimon Schubert static const char ** old_regstart, ** old_regend;
22205796c8dcSSimon Schubert static const char **best_regstart, **best_regend;
22215796c8dcSSimon Schubert static const char **reg_dummy;
22225796c8dcSSimon Schubert # endif /* DEFINED_ONCE */
22235796c8dcSSimon Schubert
22245796c8dcSSimon Schubert static PREFIX(register_info_type) *PREFIX(reg_info);
22255796c8dcSSimon Schubert static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
22265796c8dcSSimon Schubert
22275796c8dcSSimon Schubert /* Make the register vectors big enough for NUM_REGS registers,
22285796c8dcSSimon Schubert but don't make them smaller. */
22295796c8dcSSimon Schubert
22305796c8dcSSimon Schubert static void
PREFIX(regex_grow_registers)22315796c8dcSSimon Schubert PREFIX(regex_grow_registers) (int num_regs)
22325796c8dcSSimon Schubert {
22335796c8dcSSimon Schubert if (num_regs > regs_allocated_size)
22345796c8dcSSimon Schubert {
22355796c8dcSSimon Schubert RETALLOC_IF (regstart, num_regs, const char *);
22365796c8dcSSimon Schubert RETALLOC_IF (regend, num_regs, const char *);
22375796c8dcSSimon Schubert RETALLOC_IF (old_regstart, num_regs, const char *);
22385796c8dcSSimon Schubert RETALLOC_IF (old_regend, num_regs, const char *);
22395796c8dcSSimon Schubert RETALLOC_IF (best_regstart, num_regs, const char *);
22405796c8dcSSimon Schubert RETALLOC_IF (best_regend, num_regs, const char *);
22415796c8dcSSimon Schubert RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
22425796c8dcSSimon Schubert RETALLOC_IF (reg_dummy, num_regs, const char *);
22435796c8dcSSimon Schubert RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
22445796c8dcSSimon Schubert
22455796c8dcSSimon Schubert regs_allocated_size = num_regs;
22465796c8dcSSimon Schubert }
22475796c8dcSSimon Schubert }
22485796c8dcSSimon Schubert
22495796c8dcSSimon Schubert # endif /* not MATCH_MAY_ALLOCATE */
22505796c8dcSSimon Schubert
22515796c8dcSSimon Schubert # ifndef DEFINED_ONCE
22525796c8dcSSimon Schubert static boolean group_in_compile_stack (compile_stack_type compile_stack,
22535796c8dcSSimon Schubert regnum_t regnum);
22545796c8dcSSimon Schubert # endif /* not DEFINED_ONCE */
22555796c8dcSSimon Schubert
22565796c8dcSSimon Schubert /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
22575796c8dcSSimon Schubert Returns one of error codes defined in `regex.h', or zero for success.
22585796c8dcSSimon Schubert
22595796c8dcSSimon Schubert Assumes the `allocated' (and perhaps `buffer') and `translate'
22605796c8dcSSimon Schubert fields are set in BUFP on entry.
22615796c8dcSSimon Schubert
22625796c8dcSSimon Schubert If it succeeds, results are put in BUFP (if it returns an error, the
22635796c8dcSSimon Schubert contents of BUFP are undefined):
22645796c8dcSSimon Schubert `buffer' is the compiled pattern;
22655796c8dcSSimon Schubert `syntax' is set to SYNTAX;
22665796c8dcSSimon Schubert `used' is set to the length of the compiled pattern;
22675796c8dcSSimon Schubert `fastmap_accurate' is zero;
22685796c8dcSSimon Schubert `re_nsub' is the number of subexpressions in PATTERN;
22695796c8dcSSimon Schubert `not_bol' and `not_eol' are zero;
22705796c8dcSSimon Schubert
22715796c8dcSSimon Schubert The `fastmap' and `newline_anchor' fields are neither
22725796c8dcSSimon Schubert examined nor set. */
22735796c8dcSSimon Schubert
22745796c8dcSSimon Schubert /* Return, freeing storage we allocated. */
22755796c8dcSSimon Schubert # ifdef WCHAR
22765796c8dcSSimon Schubert # define FREE_STACK_RETURN(value) \
22775796c8dcSSimon Schubert return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
22785796c8dcSSimon Schubert # else
22795796c8dcSSimon Schubert # define FREE_STACK_RETURN(value) \
22805796c8dcSSimon Schubert return (free (compile_stack.stack), value)
22815796c8dcSSimon Schubert # endif /* WCHAR */
22825796c8dcSSimon Schubert
22835796c8dcSSimon Schubert static reg_errcode_t
PREFIX(regex_compile)22845796c8dcSSimon Schubert PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
22855796c8dcSSimon Schubert size_t ARG_PREFIX(size), reg_syntax_t syntax,
22865796c8dcSSimon Schubert struct re_pattern_buffer *bufp)
22875796c8dcSSimon Schubert {
22885796c8dcSSimon Schubert /* We fetch characters from PATTERN here. Even though PATTERN is
22895796c8dcSSimon Schubert `char *' (i.e., signed), we declare these variables as unsigned, so
22905796c8dcSSimon Schubert they can be reliably used as array indices. */
22915796c8dcSSimon Schubert register UCHAR_T c, c1;
22925796c8dcSSimon Schubert
22935796c8dcSSimon Schubert #ifdef WCHAR
22945796c8dcSSimon Schubert /* A temporary space to keep wchar_t pattern and compiled pattern. */
22955796c8dcSSimon Schubert CHAR_T *pattern, *COMPILED_BUFFER_VAR;
22965796c8dcSSimon Schubert size_t size;
22975796c8dcSSimon Schubert /* offset buffer for optimization. See convert_mbs_to_wc. */
22985796c8dcSSimon Schubert int *mbs_offset = NULL;
22995796c8dcSSimon Schubert /* It hold whether each wchar_t is binary data or not. */
23005796c8dcSSimon Schubert char *is_binary = NULL;
23015796c8dcSSimon Schubert /* A flag whether exactn is handling binary data or not. */
23025796c8dcSSimon Schubert char is_exactn_bin = FALSE;
23035796c8dcSSimon Schubert #endif /* WCHAR */
23045796c8dcSSimon Schubert
23055796c8dcSSimon Schubert /* A random temporary spot in PATTERN. */
23065796c8dcSSimon Schubert const CHAR_T *p1;
23075796c8dcSSimon Schubert
23085796c8dcSSimon Schubert /* Points to the end of the buffer, where we should append. */
23095796c8dcSSimon Schubert register UCHAR_T *b;
23105796c8dcSSimon Schubert
23115796c8dcSSimon Schubert /* Keeps track of unclosed groups. */
23125796c8dcSSimon Schubert compile_stack_type compile_stack;
23135796c8dcSSimon Schubert
23145796c8dcSSimon Schubert /* Points to the current (ending) position in the pattern. */
23155796c8dcSSimon Schubert #ifdef WCHAR
23165796c8dcSSimon Schubert const CHAR_T *p;
23175796c8dcSSimon Schubert const CHAR_T *pend;
23185796c8dcSSimon Schubert #else /* BYTE */
23195796c8dcSSimon Schubert const CHAR_T *p = pattern;
23205796c8dcSSimon Schubert const CHAR_T *pend = pattern + size;
23215796c8dcSSimon Schubert #endif /* WCHAR */
23225796c8dcSSimon Schubert
23235796c8dcSSimon Schubert /* How to translate the characters in the pattern. */
23245796c8dcSSimon Schubert RE_TRANSLATE_TYPE translate = bufp->translate;
23255796c8dcSSimon Schubert
23265796c8dcSSimon Schubert /* Address of the count-byte of the most recently inserted `exactn'
23275796c8dcSSimon Schubert command. This makes it possible to tell if a new exact-match
23285796c8dcSSimon Schubert character can be added to that command or if the character requires
23295796c8dcSSimon Schubert a new `exactn' command. */
23305796c8dcSSimon Schubert UCHAR_T *pending_exact = 0;
23315796c8dcSSimon Schubert
23325796c8dcSSimon Schubert /* Address of start of the most recently finished expression.
23335796c8dcSSimon Schubert This tells, e.g., postfix * where to find the start of its
23345796c8dcSSimon Schubert operand. Reset at the beginning of groups and alternatives. */
23355796c8dcSSimon Schubert UCHAR_T *laststart = 0;
23365796c8dcSSimon Schubert
23375796c8dcSSimon Schubert /* Address of beginning of regexp, or inside of last group. */
23385796c8dcSSimon Schubert UCHAR_T *begalt;
23395796c8dcSSimon Schubert
23405796c8dcSSimon Schubert /* Address of the place where a forward jump should go to the end of
23415796c8dcSSimon Schubert the containing expression. Each alternative of an `or' -- except the
23425796c8dcSSimon Schubert last -- ends with a forward jump of this sort. */
23435796c8dcSSimon Schubert UCHAR_T *fixup_alt_jump = 0;
23445796c8dcSSimon Schubert
23455796c8dcSSimon Schubert /* Counts open-groups as they are encountered. Remembered for the
23465796c8dcSSimon Schubert matching close-group on the compile stack, so the same register
23475796c8dcSSimon Schubert number is put in the stop_memory as the start_memory. */
23485796c8dcSSimon Schubert regnum_t regnum = 0;
23495796c8dcSSimon Schubert
23505796c8dcSSimon Schubert #ifdef WCHAR
23515796c8dcSSimon Schubert /* Initialize the wchar_t PATTERN and offset_buffer. */
23525796c8dcSSimon Schubert p = pend = pattern = TALLOC(csize + 1, CHAR_T);
23535796c8dcSSimon Schubert mbs_offset = TALLOC(csize + 1, int);
23545796c8dcSSimon Schubert is_binary = TALLOC(csize + 1, char);
23555796c8dcSSimon Schubert if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
23565796c8dcSSimon Schubert {
23575796c8dcSSimon Schubert free(pattern);
23585796c8dcSSimon Schubert free(mbs_offset);
23595796c8dcSSimon Schubert free(is_binary);
23605796c8dcSSimon Schubert return REG_ESPACE;
23615796c8dcSSimon Schubert }
23625796c8dcSSimon Schubert pattern[csize] = L'\0'; /* sentinel */
23635796c8dcSSimon Schubert size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
23645796c8dcSSimon Schubert pend = p + size;
23655796c8dcSSimon Schubert if (size < 0)
23665796c8dcSSimon Schubert {
23675796c8dcSSimon Schubert free(pattern);
23685796c8dcSSimon Schubert free(mbs_offset);
23695796c8dcSSimon Schubert free(is_binary);
23705796c8dcSSimon Schubert return REG_BADPAT;
23715796c8dcSSimon Schubert }
23725796c8dcSSimon Schubert #endif
23735796c8dcSSimon Schubert
23745796c8dcSSimon Schubert #ifdef DEBUG
23755796c8dcSSimon Schubert DEBUG_PRINT1 ("\nCompiling pattern: ");
23765796c8dcSSimon Schubert if (debug)
23775796c8dcSSimon Schubert {
23785796c8dcSSimon Schubert unsigned debug_count;
23795796c8dcSSimon Schubert
23805796c8dcSSimon Schubert for (debug_count = 0; debug_count < size; debug_count++)
23815796c8dcSSimon Schubert PUT_CHAR (pattern[debug_count]);
23825796c8dcSSimon Schubert putchar ('\n');
23835796c8dcSSimon Schubert }
23845796c8dcSSimon Schubert #endif /* DEBUG */
23855796c8dcSSimon Schubert
23865796c8dcSSimon Schubert /* Initialize the compile stack. */
23875796c8dcSSimon Schubert compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
23885796c8dcSSimon Schubert if (compile_stack.stack == NULL)
23895796c8dcSSimon Schubert {
23905796c8dcSSimon Schubert #ifdef WCHAR
23915796c8dcSSimon Schubert free(pattern);
23925796c8dcSSimon Schubert free(mbs_offset);
23935796c8dcSSimon Schubert free(is_binary);
23945796c8dcSSimon Schubert #endif
23955796c8dcSSimon Schubert return REG_ESPACE;
23965796c8dcSSimon Schubert }
23975796c8dcSSimon Schubert
23985796c8dcSSimon Schubert compile_stack.size = INIT_COMPILE_STACK_SIZE;
23995796c8dcSSimon Schubert compile_stack.avail = 0;
24005796c8dcSSimon Schubert
24015796c8dcSSimon Schubert /* Initialize the pattern buffer. */
24025796c8dcSSimon Schubert bufp->syntax = syntax;
24035796c8dcSSimon Schubert bufp->fastmap_accurate = 0;
24045796c8dcSSimon Schubert bufp->not_bol = bufp->not_eol = 0;
24055796c8dcSSimon Schubert
24065796c8dcSSimon Schubert /* Set `used' to zero, so that if we return an error, the pattern
24075796c8dcSSimon Schubert printer (for debugging) will think there's no pattern. We reset it
24085796c8dcSSimon Schubert at the end. */
24095796c8dcSSimon Schubert bufp->used = 0;
24105796c8dcSSimon Schubert
24115796c8dcSSimon Schubert /* Always count groups, whether or not bufp->no_sub is set. */
24125796c8dcSSimon Schubert bufp->re_nsub = 0;
24135796c8dcSSimon Schubert
24145796c8dcSSimon Schubert #if !defined emacs && !defined SYNTAX_TABLE
24155796c8dcSSimon Schubert /* Initialize the syntax table. */
24165796c8dcSSimon Schubert init_syntax_once ();
24175796c8dcSSimon Schubert #endif
24185796c8dcSSimon Schubert
24195796c8dcSSimon Schubert if (bufp->allocated == 0)
24205796c8dcSSimon Schubert {
24215796c8dcSSimon Schubert if (bufp->buffer)
24225796c8dcSSimon Schubert { /* If zero allocated, but buffer is non-null, try to realloc
24235796c8dcSSimon Schubert enough space. This loses if buffer's address is bogus, but
24245796c8dcSSimon Schubert that is the user's responsibility. */
24255796c8dcSSimon Schubert #ifdef WCHAR
24265796c8dcSSimon Schubert /* Free bufp->buffer and allocate an array for wchar_t pattern
24275796c8dcSSimon Schubert buffer. */
24285796c8dcSSimon Schubert free(bufp->buffer);
24295796c8dcSSimon Schubert COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
24305796c8dcSSimon Schubert UCHAR_T);
24315796c8dcSSimon Schubert #else
24325796c8dcSSimon Schubert RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
24335796c8dcSSimon Schubert #endif /* WCHAR */
24345796c8dcSSimon Schubert }
24355796c8dcSSimon Schubert else
24365796c8dcSSimon Schubert { /* Caller did not allocate a buffer. Do it for them. */
24375796c8dcSSimon Schubert COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
24385796c8dcSSimon Schubert UCHAR_T);
24395796c8dcSSimon Schubert }
24405796c8dcSSimon Schubert
24415796c8dcSSimon Schubert if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
24425796c8dcSSimon Schubert #ifdef WCHAR
24435796c8dcSSimon Schubert bufp->buffer = (char*)COMPILED_BUFFER_VAR;
24445796c8dcSSimon Schubert #endif /* WCHAR */
24455796c8dcSSimon Schubert bufp->allocated = INIT_BUF_SIZE;
24465796c8dcSSimon Schubert }
24475796c8dcSSimon Schubert #ifdef WCHAR
24485796c8dcSSimon Schubert else
24495796c8dcSSimon Schubert COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
24505796c8dcSSimon Schubert #endif
24515796c8dcSSimon Schubert
24525796c8dcSSimon Schubert begalt = b = COMPILED_BUFFER_VAR;
24535796c8dcSSimon Schubert
24545796c8dcSSimon Schubert /* Loop through the uncompiled pattern until we're at the end. */
24555796c8dcSSimon Schubert while (p != pend)
24565796c8dcSSimon Schubert {
24575796c8dcSSimon Schubert PATFETCH (c);
24585796c8dcSSimon Schubert
24595796c8dcSSimon Schubert switch (c)
24605796c8dcSSimon Schubert {
24615796c8dcSSimon Schubert case '^':
24625796c8dcSSimon Schubert {
24635796c8dcSSimon Schubert if ( /* If at start of pattern, it's an operator. */
24645796c8dcSSimon Schubert p == pattern + 1
24655796c8dcSSimon Schubert /* If context independent, it's an operator. */
24665796c8dcSSimon Schubert || syntax & RE_CONTEXT_INDEP_ANCHORS
24675796c8dcSSimon Schubert /* Otherwise, depends on what's come before. */
24685796c8dcSSimon Schubert || PREFIX(at_begline_loc_p) (pattern, p, syntax))
24695796c8dcSSimon Schubert BUF_PUSH (begline);
24705796c8dcSSimon Schubert else
24715796c8dcSSimon Schubert goto normal_char;
24725796c8dcSSimon Schubert }
24735796c8dcSSimon Schubert break;
24745796c8dcSSimon Schubert
24755796c8dcSSimon Schubert
24765796c8dcSSimon Schubert case '$':
24775796c8dcSSimon Schubert {
24785796c8dcSSimon Schubert if ( /* If at end of pattern, it's an operator. */
24795796c8dcSSimon Schubert p == pend
24805796c8dcSSimon Schubert /* If context independent, it's an operator. */
24815796c8dcSSimon Schubert || syntax & RE_CONTEXT_INDEP_ANCHORS
24825796c8dcSSimon Schubert /* Otherwise, depends on what's next. */
24835796c8dcSSimon Schubert || PREFIX(at_endline_loc_p) (p, pend, syntax))
24845796c8dcSSimon Schubert BUF_PUSH (endline);
24855796c8dcSSimon Schubert else
24865796c8dcSSimon Schubert goto normal_char;
24875796c8dcSSimon Schubert }
24885796c8dcSSimon Schubert break;
24895796c8dcSSimon Schubert
24905796c8dcSSimon Schubert
24915796c8dcSSimon Schubert case '+':
24925796c8dcSSimon Schubert case '?':
24935796c8dcSSimon Schubert if ((syntax & RE_BK_PLUS_QM)
24945796c8dcSSimon Schubert || (syntax & RE_LIMITED_OPS))
24955796c8dcSSimon Schubert goto normal_char;
24965796c8dcSSimon Schubert handle_plus:
24975796c8dcSSimon Schubert case '*':
24985796c8dcSSimon Schubert /* If there is no previous pattern... */
24995796c8dcSSimon Schubert if (!laststart)
25005796c8dcSSimon Schubert {
25015796c8dcSSimon Schubert if (syntax & RE_CONTEXT_INVALID_OPS)
25025796c8dcSSimon Schubert FREE_STACK_RETURN (REG_BADRPT);
25035796c8dcSSimon Schubert else if (!(syntax & RE_CONTEXT_INDEP_OPS))
25045796c8dcSSimon Schubert goto normal_char;
25055796c8dcSSimon Schubert }
25065796c8dcSSimon Schubert
25075796c8dcSSimon Schubert {
25085796c8dcSSimon Schubert /* Are we optimizing this jump? */
25095796c8dcSSimon Schubert boolean keep_string_p = false;
25105796c8dcSSimon Schubert
25115796c8dcSSimon Schubert /* 1 means zero (many) matches is allowed. */
25125796c8dcSSimon Schubert char zero_times_ok = 0, many_times_ok = 0;
25135796c8dcSSimon Schubert
25145796c8dcSSimon Schubert /* If there is a sequence of repetition chars, collapse it
25155796c8dcSSimon Schubert down to just one (the right one). We can't combine
25165796c8dcSSimon Schubert interval operators with these because of, e.g., `a{2}*',
25175796c8dcSSimon Schubert which should only match an even number of `a's. */
25185796c8dcSSimon Schubert
25195796c8dcSSimon Schubert for (;;)
25205796c8dcSSimon Schubert {
25215796c8dcSSimon Schubert zero_times_ok |= c != '+';
25225796c8dcSSimon Schubert many_times_ok |= c != '?';
25235796c8dcSSimon Schubert
25245796c8dcSSimon Schubert if (p == pend)
25255796c8dcSSimon Schubert break;
25265796c8dcSSimon Schubert
25275796c8dcSSimon Schubert PATFETCH (c);
25285796c8dcSSimon Schubert
25295796c8dcSSimon Schubert if (c == '*'
25305796c8dcSSimon Schubert || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
25315796c8dcSSimon Schubert ;
25325796c8dcSSimon Schubert
25335796c8dcSSimon Schubert else if (syntax & RE_BK_PLUS_QM && c == '\\')
25345796c8dcSSimon Schubert {
25355796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
25365796c8dcSSimon Schubert
25375796c8dcSSimon Schubert PATFETCH (c1);
25385796c8dcSSimon Schubert if (!(c1 == '+' || c1 == '?'))
25395796c8dcSSimon Schubert {
25405796c8dcSSimon Schubert PATUNFETCH;
25415796c8dcSSimon Schubert PATUNFETCH;
25425796c8dcSSimon Schubert break;
25435796c8dcSSimon Schubert }
25445796c8dcSSimon Schubert
25455796c8dcSSimon Schubert c = c1;
25465796c8dcSSimon Schubert }
25475796c8dcSSimon Schubert else
25485796c8dcSSimon Schubert {
25495796c8dcSSimon Schubert PATUNFETCH;
25505796c8dcSSimon Schubert break;
25515796c8dcSSimon Schubert }
25525796c8dcSSimon Schubert
25535796c8dcSSimon Schubert /* If we get here, we found another repeat character. */
25545796c8dcSSimon Schubert }
25555796c8dcSSimon Schubert
25565796c8dcSSimon Schubert /* Star, etc. applied to an empty pattern is equivalent
25575796c8dcSSimon Schubert to an empty pattern. */
25585796c8dcSSimon Schubert if (!laststart)
25595796c8dcSSimon Schubert break;
25605796c8dcSSimon Schubert
25615796c8dcSSimon Schubert /* Now we know whether or not zero matches is allowed
25625796c8dcSSimon Schubert and also whether or not two or more matches is allowed. */
25635796c8dcSSimon Schubert if (many_times_ok)
25645796c8dcSSimon Schubert { /* More than one repetition is allowed, so put in at the
25655796c8dcSSimon Schubert end a backward relative jump from `b' to before the next
25665796c8dcSSimon Schubert jump we're going to put in below (which jumps from
25675796c8dcSSimon Schubert laststart to after this jump).
25685796c8dcSSimon Schubert
25695796c8dcSSimon Schubert But if we are at the `*' in the exact sequence `.*\n',
25705796c8dcSSimon Schubert insert an unconditional jump backwards to the .,
25715796c8dcSSimon Schubert instead of the beginning of the loop. This way we only
25725796c8dcSSimon Schubert push a failure point once, instead of every time
25735796c8dcSSimon Schubert through the loop. */
25745796c8dcSSimon Schubert assert (p - 1 > pattern);
25755796c8dcSSimon Schubert
25765796c8dcSSimon Schubert /* Allocate the space for the jump. */
25775796c8dcSSimon Schubert GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
25785796c8dcSSimon Schubert
25795796c8dcSSimon Schubert /* We know we are not at the first character of the pattern,
25805796c8dcSSimon Schubert because laststart was nonzero. And we've already
25815796c8dcSSimon Schubert incremented `p', by the way, to be the character after
25825796c8dcSSimon Schubert the `*'. Do we have to do something analogous here
25835796c8dcSSimon Schubert for null bytes, because of RE_DOT_NOT_NULL? */
25845796c8dcSSimon Schubert if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
25855796c8dcSSimon Schubert && zero_times_ok
25865796c8dcSSimon Schubert && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
25875796c8dcSSimon Schubert && !(syntax & RE_DOT_NEWLINE))
25885796c8dcSSimon Schubert { /* We have .*\n. */
25895796c8dcSSimon Schubert STORE_JUMP (jump, b, laststart);
25905796c8dcSSimon Schubert keep_string_p = true;
25915796c8dcSSimon Schubert }
25925796c8dcSSimon Schubert else
25935796c8dcSSimon Schubert /* Anything else. */
25945796c8dcSSimon Schubert STORE_JUMP (maybe_pop_jump, b, laststart -
25955796c8dcSSimon Schubert (1 + OFFSET_ADDRESS_SIZE));
25965796c8dcSSimon Schubert
25975796c8dcSSimon Schubert /* We've added more stuff to the buffer. */
25985796c8dcSSimon Schubert b += 1 + OFFSET_ADDRESS_SIZE;
25995796c8dcSSimon Schubert }
26005796c8dcSSimon Schubert
26015796c8dcSSimon Schubert /* On failure, jump from laststart to b + 3, which will be the
26025796c8dcSSimon Schubert end of the buffer after this jump is inserted. */
26035796c8dcSSimon Schubert /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
26045796c8dcSSimon Schubert 'b + 3'. */
26055796c8dcSSimon Schubert GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
26065796c8dcSSimon Schubert INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
26075796c8dcSSimon Schubert : on_failure_jump,
26085796c8dcSSimon Schubert laststart, b + 1 + OFFSET_ADDRESS_SIZE);
26095796c8dcSSimon Schubert pending_exact = 0;
26105796c8dcSSimon Schubert b += 1 + OFFSET_ADDRESS_SIZE;
26115796c8dcSSimon Schubert
26125796c8dcSSimon Schubert if (!zero_times_ok)
26135796c8dcSSimon Schubert {
26145796c8dcSSimon Schubert /* At least one repetition is required, so insert a
26155796c8dcSSimon Schubert `dummy_failure_jump' before the initial
26165796c8dcSSimon Schubert `on_failure_jump' instruction of the loop. This
26175796c8dcSSimon Schubert effects a skip over that instruction the first time
26185796c8dcSSimon Schubert we hit that loop. */
26195796c8dcSSimon Schubert GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
26205796c8dcSSimon Schubert INSERT_JUMP (dummy_failure_jump, laststart, laststart +
26215796c8dcSSimon Schubert 2 + 2 * OFFSET_ADDRESS_SIZE);
26225796c8dcSSimon Schubert b += 1 + OFFSET_ADDRESS_SIZE;
26235796c8dcSSimon Schubert }
26245796c8dcSSimon Schubert }
26255796c8dcSSimon Schubert break;
26265796c8dcSSimon Schubert
26275796c8dcSSimon Schubert
26285796c8dcSSimon Schubert case '.':
26295796c8dcSSimon Schubert laststart = b;
26305796c8dcSSimon Schubert BUF_PUSH (anychar);
26315796c8dcSSimon Schubert break;
26325796c8dcSSimon Schubert
26335796c8dcSSimon Schubert
26345796c8dcSSimon Schubert case '[':
26355796c8dcSSimon Schubert {
26365796c8dcSSimon Schubert boolean had_char_class = false;
26375796c8dcSSimon Schubert #ifdef WCHAR
26385796c8dcSSimon Schubert CHAR_T range_start = 0xffffffff;
26395796c8dcSSimon Schubert #else
26405796c8dcSSimon Schubert unsigned int range_start = 0xffffffff;
26415796c8dcSSimon Schubert #endif
26425796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
26435796c8dcSSimon Schubert
26445796c8dcSSimon Schubert #ifdef WCHAR
26455796c8dcSSimon Schubert /* We assume a charset(_not) structure as a wchar_t array.
26465796c8dcSSimon Schubert charset[0] = (re_opcode_t) charset(_not)
26475796c8dcSSimon Schubert charset[1] = l (= length of char_classes)
26485796c8dcSSimon Schubert charset[2] = m (= length of collating_symbols)
26495796c8dcSSimon Schubert charset[3] = n (= length of equivalence_classes)
26505796c8dcSSimon Schubert charset[4] = o (= length of char_ranges)
26515796c8dcSSimon Schubert charset[5] = p (= length of chars)
26525796c8dcSSimon Schubert
26535796c8dcSSimon Schubert charset[6] = char_class (wctype_t)
26545796c8dcSSimon Schubert charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
26555796c8dcSSimon Schubert ...
26565796c8dcSSimon Schubert charset[l+5] = char_class (wctype_t)
26575796c8dcSSimon Schubert
26585796c8dcSSimon Schubert charset[l+6] = collating_symbol (wchar_t)
26595796c8dcSSimon Schubert ...
26605796c8dcSSimon Schubert charset[l+m+5] = collating_symbol (wchar_t)
26615796c8dcSSimon Schubert ifdef _LIBC we use the index if
26625796c8dcSSimon Schubert _NL_COLLATE_SYMB_EXTRAMB instead of
26635796c8dcSSimon Schubert wchar_t string.
26645796c8dcSSimon Schubert
26655796c8dcSSimon Schubert charset[l+m+6] = equivalence_classes (wchar_t)
26665796c8dcSSimon Schubert ...
26675796c8dcSSimon Schubert charset[l+m+n+5] = equivalence_classes (wchar_t)
26685796c8dcSSimon Schubert ifdef _LIBC we use the index in
26695796c8dcSSimon Schubert _NL_COLLATE_WEIGHT instead of
26705796c8dcSSimon Schubert wchar_t string.
26715796c8dcSSimon Schubert
26725796c8dcSSimon Schubert charset[l+m+n+6] = range_start
26735796c8dcSSimon Schubert charset[l+m+n+7] = range_end
26745796c8dcSSimon Schubert ...
26755796c8dcSSimon Schubert charset[l+m+n+2o+4] = range_start
26765796c8dcSSimon Schubert charset[l+m+n+2o+5] = range_end
26775796c8dcSSimon Schubert ifdef _LIBC we use the value looked up
26785796c8dcSSimon Schubert in _NL_COLLATE_COLLSEQ instead of
26795796c8dcSSimon Schubert wchar_t character.
26805796c8dcSSimon Schubert
26815796c8dcSSimon Schubert charset[l+m+n+2o+6] = char
26825796c8dcSSimon Schubert ...
26835796c8dcSSimon Schubert charset[l+m+n+2o+p+5] = char
26845796c8dcSSimon Schubert
26855796c8dcSSimon Schubert */
26865796c8dcSSimon Schubert
26875796c8dcSSimon Schubert /* We need at least 6 spaces: the opcode, the length of
26885796c8dcSSimon Schubert char_classes, the length of collating_symbols, the length of
26895796c8dcSSimon Schubert equivalence_classes, the length of char_ranges, the length of
26905796c8dcSSimon Schubert chars. */
26915796c8dcSSimon Schubert GET_BUFFER_SPACE (6);
26925796c8dcSSimon Schubert
26935796c8dcSSimon Schubert /* Save b as laststart. And We use laststart as the pointer
26945796c8dcSSimon Schubert to the first element of the charset here.
26955796c8dcSSimon Schubert In other words, laststart[i] indicates charset[i]. */
26965796c8dcSSimon Schubert laststart = b;
26975796c8dcSSimon Schubert
26985796c8dcSSimon Schubert /* We test `*p == '^' twice, instead of using an if
26995796c8dcSSimon Schubert statement, so we only need one BUF_PUSH. */
27005796c8dcSSimon Schubert BUF_PUSH (*p == '^' ? charset_not : charset);
27015796c8dcSSimon Schubert if (*p == '^')
27025796c8dcSSimon Schubert p++;
27035796c8dcSSimon Schubert
27045796c8dcSSimon Schubert /* Push the length of char_classes, the length of
27055796c8dcSSimon Schubert collating_symbols, the length of equivalence_classes, the
27065796c8dcSSimon Schubert length of char_ranges and the length of chars. */
27075796c8dcSSimon Schubert BUF_PUSH_3 (0, 0, 0);
27085796c8dcSSimon Schubert BUF_PUSH_2 (0, 0);
27095796c8dcSSimon Schubert
27105796c8dcSSimon Schubert /* Remember the first position in the bracket expression. */
27115796c8dcSSimon Schubert p1 = p;
27125796c8dcSSimon Schubert
27135796c8dcSSimon Schubert /* charset_not matches newline according to a syntax bit. */
27145796c8dcSSimon Schubert if ((re_opcode_t) b[-6] == charset_not
27155796c8dcSSimon Schubert && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
27165796c8dcSSimon Schubert {
27175796c8dcSSimon Schubert BUF_PUSH('\n');
27185796c8dcSSimon Schubert laststart[5]++; /* Update the length of characters */
27195796c8dcSSimon Schubert }
27205796c8dcSSimon Schubert
27215796c8dcSSimon Schubert /* Read in characters and ranges, setting map bits. */
27225796c8dcSSimon Schubert for (;;)
27235796c8dcSSimon Schubert {
27245796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
27255796c8dcSSimon Schubert
27265796c8dcSSimon Schubert PATFETCH (c);
27275796c8dcSSimon Schubert
27285796c8dcSSimon Schubert /* \ might escape characters inside [...] and [^...]. */
27295796c8dcSSimon Schubert if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
27305796c8dcSSimon Schubert {
27315796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
27325796c8dcSSimon Schubert
27335796c8dcSSimon Schubert PATFETCH (c1);
27345796c8dcSSimon Schubert BUF_PUSH(c1);
27355796c8dcSSimon Schubert laststart[5]++; /* Update the length of chars */
27365796c8dcSSimon Schubert range_start = c1;
27375796c8dcSSimon Schubert continue;
27385796c8dcSSimon Schubert }
27395796c8dcSSimon Schubert
27405796c8dcSSimon Schubert /* Could be the end of the bracket expression. If it's
27415796c8dcSSimon Schubert not (i.e., when the bracket expression is `[]' so
27425796c8dcSSimon Schubert far), the ']' character bit gets set way below. */
27435796c8dcSSimon Schubert if (c == ']' && p != p1 + 1)
27445796c8dcSSimon Schubert break;
27455796c8dcSSimon Schubert
27465796c8dcSSimon Schubert /* Look ahead to see if it's a range when the last thing
27475796c8dcSSimon Schubert was a character class. */
27485796c8dcSSimon Schubert if (had_char_class && c == '-' && *p != ']')
27495796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ERANGE);
27505796c8dcSSimon Schubert
27515796c8dcSSimon Schubert /* Look ahead to see if it's a range when the last thing
27525796c8dcSSimon Schubert was a character: if this is a hyphen not at the
27535796c8dcSSimon Schubert beginning or the end of a list, then it's the range
27545796c8dcSSimon Schubert operator. */
27555796c8dcSSimon Schubert if (c == '-'
27565796c8dcSSimon Schubert && !(p - 2 >= pattern && p[-2] == '[')
27575796c8dcSSimon Schubert && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
27585796c8dcSSimon Schubert && *p != ']')
27595796c8dcSSimon Schubert {
27605796c8dcSSimon Schubert reg_errcode_t ret;
27615796c8dcSSimon Schubert /* Allocate the space for range_start and range_end. */
27625796c8dcSSimon Schubert GET_BUFFER_SPACE (2);
27635796c8dcSSimon Schubert /* Update the pointer to indicate end of buffer. */
27645796c8dcSSimon Schubert b += 2;
27655796c8dcSSimon Schubert ret = wcs_compile_range (range_start, &p, pend, translate,
27665796c8dcSSimon Schubert syntax, b, laststart);
27675796c8dcSSimon Schubert if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
27685796c8dcSSimon Schubert range_start = 0xffffffff;
27695796c8dcSSimon Schubert }
27705796c8dcSSimon Schubert else if (p[0] == '-' && p[1] != ']')
27715796c8dcSSimon Schubert { /* This handles ranges made up of characters only. */
27725796c8dcSSimon Schubert reg_errcode_t ret;
27735796c8dcSSimon Schubert
27745796c8dcSSimon Schubert /* Move past the `-'. */
27755796c8dcSSimon Schubert PATFETCH (c1);
27765796c8dcSSimon Schubert /* Allocate the space for range_start and range_end. */
27775796c8dcSSimon Schubert GET_BUFFER_SPACE (2);
27785796c8dcSSimon Schubert /* Update the pointer to indicate end of buffer. */
27795796c8dcSSimon Schubert b += 2;
27805796c8dcSSimon Schubert ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
27815796c8dcSSimon Schubert laststart);
27825796c8dcSSimon Schubert if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
27835796c8dcSSimon Schubert range_start = 0xffffffff;
27845796c8dcSSimon Schubert }
27855796c8dcSSimon Schubert
27865796c8dcSSimon Schubert /* See if we're at the beginning of a possible character
27875796c8dcSSimon Schubert class. */
27885796c8dcSSimon Schubert else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
27895796c8dcSSimon Schubert { /* Leave room for the null. */
27905796c8dcSSimon Schubert char str[CHAR_CLASS_MAX_LENGTH + 1];
27915796c8dcSSimon Schubert
27925796c8dcSSimon Schubert PATFETCH (c);
27935796c8dcSSimon Schubert c1 = 0;
27945796c8dcSSimon Schubert
27955796c8dcSSimon Schubert /* If pattern is `[[:'. */
27965796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
27975796c8dcSSimon Schubert
27985796c8dcSSimon Schubert for (;;)
27995796c8dcSSimon Schubert {
28005796c8dcSSimon Schubert PATFETCH (c);
28015796c8dcSSimon Schubert if ((c == ':' && *p == ']') || p == pend)
28025796c8dcSSimon Schubert break;
28035796c8dcSSimon Schubert if (c1 < CHAR_CLASS_MAX_LENGTH)
28045796c8dcSSimon Schubert str[c1++] = c;
28055796c8dcSSimon Schubert else
28065796c8dcSSimon Schubert /* This is in any case an invalid class name. */
28075796c8dcSSimon Schubert str[0] = '\0';
28085796c8dcSSimon Schubert }
28095796c8dcSSimon Schubert str[c1] = '\0';
28105796c8dcSSimon Schubert
28115796c8dcSSimon Schubert /* If isn't a word bracketed by `[:' and `:]':
28125796c8dcSSimon Schubert undo the ending character, the letters, and leave
28135796c8dcSSimon Schubert the leading `:' and `[' (but store them as character). */
28145796c8dcSSimon Schubert if (c == ':' && *p == ']')
28155796c8dcSSimon Schubert {
28165796c8dcSSimon Schubert wctype_t wt;
28175796c8dcSSimon Schubert uintptr_t alignedp;
28185796c8dcSSimon Schubert
28195796c8dcSSimon Schubert /* Query the character class as wctype_t. */
28205796c8dcSSimon Schubert wt = IS_CHAR_CLASS (str);
28215796c8dcSSimon Schubert if (wt == 0)
28225796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECTYPE);
28235796c8dcSSimon Schubert
28245796c8dcSSimon Schubert /* Throw away the ] at the end of the character
28255796c8dcSSimon Schubert class. */
28265796c8dcSSimon Schubert PATFETCH (c);
28275796c8dcSSimon Schubert
28285796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
28295796c8dcSSimon Schubert
28305796c8dcSSimon Schubert /* Allocate the space for character class. */
28315796c8dcSSimon Schubert GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
28325796c8dcSSimon Schubert /* Update the pointer to indicate end of buffer. */
28335796c8dcSSimon Schubert b += CHAR_CLASS_SIZE;
28345796c8dcSSimon Schubert /* Move data which follow character classes
28355796c8dcSSimon Schubert not to violate the data. */
28365796c8dcSSimon Schubert insert_space(CHAR_CLASS_SIZE,
28375796c8dcSSimon Schubert laststart + 6 + laststart[1],
28385796c8dcSSimon Schubert b - 1);
28395796c8dcSSimon Schubert alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
28405796c8dcSSimon Schubert + __alignof__(wctype_t) - 1)
28415796c8dcSSimon Schubert & ~(uintptr_t)(__alignof__(wctype_t) - 1);
28425796c8dcSSimon Schubert /* Store the character class. */
28435796c8dcSSimon Schubert *((wctype_t*)alignedp) = wt;
28445796c8dcSSimon Schubert /* Update length of char_classes */
28455796c8dcSSimon Schubert laststart[1] += CHAR_CLASS_SIZE;
28465796c8dcSSimon Schubert
28475796c8dcSSimon Schubert had_char_class = true;
28485796c8dcSSimon Schubert }
28495796c8dcSSimon Schubert else
28505796c8dcSSimon Schubert {
28515796c8dcSSimon Schubert c1++;
28525796c8dcSSimon Schubert while (c1--)
28535796c8dcSSimon Schubert PATUNFETCH;
28545796c8dcSSimon Schubert BUF_PUSH ('[');
28555796c8dcSSimon Schubert BUF_PUSH (':');
28565796c8dcSSimon Schubert laststart[5] += 2; /* Update the length of characters */
28575796c8dcSSimon Schubert range_start = ':';
28585796c8dcSSimon Schubert had_char_class = false;
28595796c8dcSSimon Schubert }
28605796c8dcSSimon Schubert }
28615796c8dcSSimon Schubert else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
28625796c8dcSSimon Schubert || *p == '.'))
28635796c8dcSSimon Schubert {
28645796c8dcSSimon Schubert CHAR_T str[128]; /* Should be large enough. */
28655796c8dcSSimon Schubert CHAR_T delim = *p; /* '=' or '.' */
28665796c8dcSSimon Schubert # ifdef _LIBC
28675796c8dcSSimon Schubert uint32_t nrules =
28685796c8dcSSimon Schubert _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
28695796c8dcSSimon Schubert # endif
28705796c8dcSSimon Schubert PATFETCH (c);
28715796c8dcSSimon Schubert c1 = 0;
28725796c8dcSSimon Schubert
28735796c8dcSSimon Schubert /* If pattern is `[[=' or '[[.'. */
28745796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
28755796c8dcSSimon Schubert
28765796c8dcSSimon Schubert for (;;)
28775796c8dcSSimon Schubert {
28785796c8dcSSimon Schubert PATFETCH (c);
28795796c8dcSSimon Schubert if ((c == delim && *p == ']') || p == pend)
28805796c8dcSSimon Schubert break;
28815796c8dcSSimon Schubert if (c1 < sizeof (str) - 1)
28825796c8dcSSimon Schubert str[c1++] = c;
28835796c8dcSSimon Schubert else
28845796c8dcSSimon Schubert /* This is in any case an invalid class name. */
28855796c8dcSSimon Schubert str[0] = '\0';
28865796c8dcSSimon Schubert }
28875796c8dcSSimon Schubert str[c1] = '\0';
28885796c8dcSSimon Schubert
28895796c8dcSSimon Schubert if (c == delim && *p == ']' && str[0] != '\0')
28905796c8dcSSimon Schubert {
28915796c8dcSSimon Schubert unsigned int i, offset;
28925796c8dcSSimon Schubert /* If we have no collation data we use the default
28935796c8dcSSimon Schubert collation in which each character is in a class
28945796c8dcSSimon Schubert by itself. It also means that ASCII is the
28955796c8dcSSimon Schubert character set and therefore we cannot have character
28965796c8dcSSimon Schubert with more than one byte in the multibyte
28975796c8dcSSimon Schubert representation. */
28985796c8dcSSimon Schubert
28995796c8dcSSimon Schubert /* If not defined _LIBC, we push the name and
29005796c8dcSSimon Schubert `\0' for the sake of matching performance. */
29015796c8dcSSimon Schubert int datasize = c1 + 1;
29025796c8dcSSimon Schubert
29035796c8dcSSimon Schubert # ifdef _LIBC
29045796c8dcSSimon Schubert int32_t idx = 0;
29055796c8dcSSimon Schubert if (nrules == 0)
29065796c8dcSSimon Schubert # endif
29075796c8dcSSimon Schubert {
29085796c8dcSSimon Schubert if (c1 != 1)
29095796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECOLLATE);
29105796c8dcSSimon Schubert }
29115796c8dcSSimon Schubert # ifdef _LIBC
29125796c8dcSSimon Schubert else
29135796c8dcSSimon Schubert {
29145796c8dcSSimon Schubert const int32_t *table;
29155796c8dcSSimon Schubert const int32_t *weights;
29165796c8dcSSimon Schubert const int32_t *extra;
29175796c8dcSSimon Schubert const int32_t *indirect;
29185796c8dcSSimon Schubert wint_t *cp;
29195796c8dcSSimon Schubert
29205796c8dcSSimon Schubert /* This #include defines a local function! */
29215796c8dcSSimon Schubert # include <locale/weightwc.h>
29225796c8dcSSimon Schubert
29235796c8dcSSimon Schubert if(delim == '=')
29245796c8dcSSimon Schubert {
29255796c8dcSSimon Schubert /* We push the index for equivalence class. */
29265796c8dcSSimon Schubert cp = (wint_t*)str;
29275796c8dcSSimon Schubert
29285796c8dcSSimon Schubert table = (const int32_t *)
29295796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
29305796c8dcSSimon Schubert _NL_COLLATE_TABLEWC);
29315796c8dcSSimon Schubert weights = (const int32_t *)
29325796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
29335796c8dcSSimon Schubert _NL_COLLATE_WEIGHTWC);
29345796c8dcSSimon Schubert extra = (const int32_t *)
29355796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
29365796c8dcSSimon Schubert _NL_COLLATE_EXTRAWC);
29375796c8dcSSimon Schubert indirect = (const int32_t *)
29385796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
29395796c8dcSSimon Schubert _NL_COLLATE_INDIRECTWC);
29405796c8dcSSimon Schubert
29415796c8dcSSimon Schubert idx = findidx ((const wint_t**)&cp);
29425796c8dcSSimon Schubert if (idx == 0 || cp < (wint_t*) str + c1)
29435796c8dcSSimon Schubert /* This is no valid character. */
29445796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECOLLATE);
29455796c8dcSSimon Schubert
29465796c8dcSSimon Schubert str[0] = (wchar_t)idx;
29475796c8dcSSimon Schubert }
29485796c8dcSSimon Schubert else /* delim == '.' */
29495796c8dcSSimon Schubert {
29505796c8dcSSimon Schubert /* We push collation sequence value
29515796c8dcSSimon Schubert for collating symbol. */
29525796c8dcSSimon Schubert int32_t table_size;
29535796c8dcSSimon Schubert const int32_t *symb_table;
29545796c8dcSSimon Schubert const unsigned char *extra;
29555796c8dcSSimon Schubert int32_t idx;
29565796c8dcSSimon Schubert int32_t elem;
29575796c8dcSSimon Schubert int32_t second;
29585796c8dcSSimon Schubert int32_t hash;
29595796c8dcSSimon Schubert char char_str[c1];
29605796c8dcSSimon Schubert
29615796c8dcSSimon Schubert /* We have to convert the name to a single-byte
29625796c8dcSSimon Schubert string. This is possible since the names
29635796c8dcSSimon Schubert consist of ASCII characters and the internal
29645796c8dcSSimon Schubert representation is UCS4. */
29655796c8dcSSimon Schubert for (i = 0; i < c1; ++i)
29665796c8dcSSimon Schubert char_str[i] = str[i];
29675796c8dcSSimon Schubert
29685796c8dcSSimon Schubert table_size =
29695796c8dcSSimon Schubert _NL_CURRENT_WORD (LC_COLLATE,
29705796c8dcSSimon Schubert _NL_COLLATE_SYMB_HASH_SIZEMB);
29715796c8dcSSimon Schubert symb_table = (const int32_t *)
29725796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
29735796c8dcSSimon Schubert _NL_COLLATE_SYMB_TABLEMB);
29745796c8dcSSimon Schubert extra = (const unsigned char *)
29755796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
29765796c8dcSSimon Schubert _NL_COLLATE_SYMB_EXTRAMB);
29775796c8dcSSimon Schubert
29785796c8dcSSimon Schubert /* Locate the character in the hashing table. */
29795796c8dcSSimon Schubert hash = elem_hash (char_str, c1);
29805796c8dcSSimon Schubert
29815796c8dcSSimon Schubert idx = 0;
29825796c8dcSSimon Schubert elem = hash % table_size;
29835796c8dcSSimon Schubert second = hash % (table_size - 2);
29845796c8dcSSimon Schubert while (symb_table[2 * elem] != 0)
29855796c8dcSSimon Schubert {
29865796c8dcSSimon Schubert /* First compare the hashing value. */
29875796c8dcSSimon Schubert if (symb_table[2 * elem] == hash
29885796c8dcSSimon Schubert && c1 == extra[symb_table[2 * elem + 1]]
29895796c8dcSSimon Schubert && memcmp (char_str,
29905796c8dcSSimon Schubert &extra[symb_table[2 * elem + 1]
29915796c8dcSSimon Schubert + 1], c1) == 0)
29925796c8dcSSimon Schubert {
29935796c8dcSSimon Schubert /* Yep, this is the entry. */
29945796c8dcSSimon Schubert idx = symb_table[2 * elem + 1];
29955796c8dcSSimon Schubert idx += 1 + extra[idx];
29965796c8dcSSimon Schubert break;
29975796c8dcSSimon Schubert }
29985796c8dcSSimon Schubert
29995796c8dcSSimon Schubert /* Next entry. */
30005796c8dcSSimon Schubert elem += second;
30015796c8dcSSimon Schubert }
30025796c8dcSSimon Schubert
30035796c8dcSSimon Schubert if (symb_table[2 * elem] != 0)
30045796c8dcSSimon Schubert {
30055796c8dcSSimon Schubert /* Compute the index of the byte sequence
30065796c8dcSSimon Schubert in the table. */
30075796c8dcSSimon Schubert idx += 1 + extra[idx];
30085796c8dcSSimon Schubert /* Adjust for the alignment. */
30095796c8dcSSimon Schubert idx = (idx + 3) & ~3;
30105796c8dcSSimon Schubert
30115796c8dcSSimon Schubert str[0] = (wchar_t) idx + 4;
30125796c8dcSSimon Schubert }
30135796c8dcSSimon Schubert else if (symb_table[2 * elem] == 0 && c1 == 1)
30145796c8dcSSimon Schubert {
30155796c8dcSSimon Schubert /* No valid character. Match it as a
30165796c8dcSSimon Schubert single byte character. */
30175796c8dcSSimon Schubert had_char_class = false;
30185796c8dcSSimon Schubert BUF_PUSH(str[0]);
30195796c8dcSSimon Schubert /* Update the length of characters */
30205796c8dcSSimon Schubert laststart[5]++;
30215796c8dcSSimon Schubert range_start = str[0];
30225796c8dcSSimon Schubert
30235796c8dcSSimon Schubert /* Throw away the ] at the end of the
30245796c8dcSSimon Schubert collating symbol. */
30255796c8dcSSimon Schubert PATFETCH (c);
30265796c8dcSSimon Schubert /* exit from the switch block. */
30275796c8dcSSimon Schubert continue;
30285796c8dcSSimon Schubert }
30295796c8dcSSimon Schubert else
30305796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECOLLATE);
30315796c8dcSSimon Schubert }
30325796c8dcSSimon Schubert datasize = 1;
30335796c8dcSSimon Schubert }
30345796c8dcSSimon Schubert # endif
30355796c8dcSSimon Schubert /* Throw away the ] at the end of the equivalence
30365796c8dcSSimon Schubert class (or collating symbol). */
30375796c8dcSSimon Schubert PATFETCH (c);
30385796c8dcSSimon Schubert
30395796c8dcSSimon Schubert /* Allocate the space for the equivalence class
30405796c8dcSSimon Schubert (or collating symbol) (and '\0' if needed). */
30415796c8dcSSimon Schubert GET_BUFFER_SPACE(datasize);
30425796c8dcSSimon Schubert /* Update the pointer to indicate end of buffer. */
30435796c8dcSSimon Schubert b += datasize;
30445796c8dcSSimon Schubert
30455796c8dcSSimon Schubert if (delim == '=')
30465796c8dcSSimon Schubert { /* equivalence class */
30475796c8dcSSimon Schubert /* Calculate the offset of char_ranges,
30485796c8dcSSimon Schubert which is next to equivalence_classes. */
30495796c8dcSSimon Schubert offset = laststart[1] + laststart[2]
30505796c8dcSSimon Schubert + laststart[3] +6;
30515796c8dcSSimon Schubert /* Insert space. */
30525796c8dcSSimon Schubert insert_space(datasize, laststart + offset, b - 1);
30535796c8dcSSimon Schubert
30545796c8dcSSimon Schubert /* Write the equivalence_class and \0. */
30555796c8dcSSimon Schubert for (i = 0 ; i < datasize ; i++)
30565796c8dcSSimon Schubert laststart[offset + i] = str[i];
30575796c8dcSSimon Schubert
30585796c8dcSSimon Schubert /* Update the length of equivalence_classes. */
30595796c8dcSSimon Schubert laststart[3] += datasize;
30605796c8dcSSimon Schubert had_char_class = true;
30615796c8dcSSimon Schubert }
30625796c8dcSSimon Schubert else /* delim == '.' */
30635796c8dcSSimon Schubert { /* collating symbol */
30645796c8dcSSimon Schubert /* Calculate the offset of the equivalence_classes,
30655796c8dcSSimon Schubert which is next to collating_symbols. */
30665796c8dcSSimon Schubert offset = laststart[1] + laststart[2] + 6;
30675796c8dcSSimon Schubert /* Insert space and write the collationg_symbol
30685796c8dcSSimon Schubert and \0. */
30695796c8dcSSimon Schubert insert_space(datasize, laststart + offset, b-1);
30705796c8dcSSimon Schubert for (i = 0 ; i < datasize ; i++)
30715796c8dcSSimon Schubert laststart[offset + i] = str[i];
30725796c8dcSSimon Schubert
30735796c8dcSSimon Schubert /* In re_match_2_internal if range_start < -1, we
30745796c8dcSSimon Schubert assume -range_start is the offset of the
30755796c8dcSSimon Schubert collating symbol which is specified as
30765796c8dcSSimon Schubert the character of the range start. So we assign
30775796c8dcSSimon Schubert -(laststart[1] + laststart[2] + 6) to
30785796c8dcSSimon Schubert range_start. */
30795796c8dcSSimon Schubert range_start = -(laststart[1] + laststart[2] + 6);
30805796c8dcSSimon Schubert /* Update the length of collating_symbol. */
30815796c8dcSSimon Schubert laststart[2] += datasize;
30825796c8dcSSimon Schubert had_char_class = false;
30835796c8dcSSimon Schubert }
30845796c8dcSSimon Schubert }
30855796c8dcSSimon Schubert else
30865796c8dcSSimon Schubert {
30875796c8dcSSimon Schubert c1++;
30885796c8dcSSimon Schubert while (c1--)
30895796c8dcSSimon Schubert PATUNFETCH;
30905796c8dcSSimon Schubert BUF_PUSH ('[');
30915796c8dcSSimon Schubert BUF_PUSH (delim);
30925796c8dcSSimon Schubert laststart[5] += 2; /* Update the length of characters */
30935796c8dcSSimon Schubert range_start = delim;
30945796c8dcSSimon Schubert had_char_class = false;
30955796c8dcSSimon Schubert }
30965796c8dcSSimon Schubert }
30975796c8dcSSimon Schubert else
30985796c8dcSSimon Schubert {
30995796c8dcSSimon Schubert had_char_class = false;
31005796c8dcSSimon Schubert BUF_PUSH(c);
31015796c8dcSSimon Schubert laststart[5]++; /* Update the length of characters */
31025796c8dcSSimon Schubert range_start = c;
31035796c8dcSSimon Schubert }
31045796c8dcSSimon Schubert }
31055796c8dcSSimon Schubert
31065796c8dcSSimon Schubert #else /* BYTE */
31075796c8dcSSimon Schubert /* Ensure that we have enough space to push a charset: the
31085796c8dcSSimon Schubert opcode, the length count, and the bitset; 34 bytes in all. */
31095796c8dcSSimon Schubert GET_BUFFER_SPACE (34);
31105796c8dcSSimon Schubert
31115796c8dcSSimon Schubert laststart = b;
31125796c8dcSSimon Schubert
31135796c8dcSSimon Schubert /* We test `*p == '^' twice, instead of using an if
31145796c8dcSSimon Schubert statement, so we only need one BUF_PUSH. */
31155796c8dcSSimon Schubert BUF_PUSH (*p == '^' ? charset_not : charset);
31165796c8dcSSimon Schubert if (*p == '^')
31175796c8dcSSimon Schubert p++;
31185796c8dcSSimon Schubert
31195796c8dcSSimon Schubert /* Remember the first position in the bracket expression. */
31205796c8dcSSimon Schubert p1 = p;
31215796c8dcSSimon Schubert
31225796c8dcSSimon Schubert /* Push the number of bytes in the bitmap. */
31235796c8dcSSimon Schubert BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
31245796c8dcSSimon Schubert
31255796c8dcSSimon Schubert /* Clear the whole map. */
31265796c8dcSSimon Schubert bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
31275796c8dcSSimon Schubert
31285796c8dcSSimon Schubert /* charset_not matches newline according to a syntax bit. */
31295796c8dcSSimon Schubert if ((re_opcode_t) b[-2] == charset_not
31305796c8dcSSimon Schubert && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
31315796c8dcSSimon Schubert SET_LIST_BIT ('\n');
31325796c8dcSSimon Schubert
31335796c8dcSSimon Schubert /* Read in characters and ranges, setting map bits. */
31345796c8dcSSimon Schubert for (;;)
31355796c8dcSSimon Schubert {
31365796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
31375796c8dcSSimon Schubert
31385796c8dcSSimon Schubert PATFETCH (c);
31395796c8dcSSimon Schubert
31405796c8dcSSimon Schubert /* \ might escape characters inside [...] and [^...]. */
31415796c8dcSSimon Schubert if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
31425796c8dcSSimon Schubert {
31435796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
31445796c8dcSSimon Schubert
31455796c8dcSSimon Schubert PATFETCH (c1);
31465796c8dcSSimon Schubert SET_LIST_BIT (c1);
31475796c8dcSSimon Schubert range_start = c1;
31485796c8dcSSimon Schubert continue;
31495796c8dcSSimon Schubert }
31505796c8dcSSimon Schubert
31515796c8dcSSimon Schubert /* Could be the end of the bracket expression. If it's
31525796c8dcSSimon Schubert not (i.e., when the bracket expression is `[]' so
31535796c8dcSSimon Schubert far), the ']' character bit gets set way below. */
31545796c8dcSSimon Schubert if (c == ']' && p != p1 + 1)
31555796c8dcSSimon Schubert break;
31565796c8dcSSimon Schubert
31575796c8dcSSimon Schubert /* Look ahead to see if it's a range when the last thing
31585796c8dcSSimon Schubert was a character class. */
31595796c8dcSSimon Schubert if (had_char_class && c == '-' && *p != ']')
31605796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ERANGE);
31615796c8dcSSimon Schubert
31625796c8dcSSimon Schubert /* Look ahead to see if it's a range when the last thing
31635796c8dcSSimon Schubert was a character: if this is a hyphen not at the
31645796c8dcSSimon Schubert beginning or the end of a list, then it's the range
31655796c8dcSSimon Schubert operator. */
31665796c8dcSSimon Schubert if (c == '-'
31675796c8dcSSimon Schubert && !(p - 2 >= pattern && p[-2] == '[')
31685796c8dcSSimon Schubert && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
31695796c8dcSSimon Schubert && *p != ']')
31705796c8dcSSimon Schubert {
31715796c8dcSSimon Schubert reg_errcode_t ret
31725796c8dcSSimon Schubert = byte_compile_range (range_start, &p, pend, translate,
31735796c8dcSSimon Schubert syntax, b);
31745796c8dcSSimon Schubert if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
31755796c8dcSSimon Schubert range_start = 0xffffffff;
31765796c8dcSSimon Schubert }
31775796c8dcSSimon Schubert
31785796c8dcSSimon Schubert else if (p[0] == '-' && p[1] != ']')
31795796c8dcSSimon Schubert { /* This handles ranges made up of characters only. */
31805796c8dcSSimon Schubert reg_errcode_t ret;
31815796c8dcSSimon Schubert
31825796c8dcSSimon Schubert /* Move past the `-'. */
31835796c8dcSSimon Schubert PATFETCH (c1);
31845796c8dcSSimon Schubert
31855796c8dcSSimon Schubert ret = byte_compile_range (c, &p, pend, translate, syntax, b);
31865796c8dcSSimon Schubert if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
31875796c8dcSSimon Schubert range_start = 0xffffffff;
31885796c8dcSSimon Schubert }
31895796c8dcSSimon Schubert
31905796c8dcSSimon Schubert /* See if we're at the beginning of a possible character
31915796c8dcSSimon Schubert class. */
31925796c8dcSSimon Schubert
31935796c8dcSSimon Schubert else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
31945796c8dcSSimon Schubert { /* Leave room for the null. */
31955796c8dcSSimon Schubert char str[CHAR_CLASS_MAX_LENGTH + 1];
31965796c8dcSSimon Schubert
31975796c8dcSSimon Schubert PATFETCH (c);
31985796c8dcSSimon Schubert c1 = 0;
31995796c8dcSSimon Schubert
32005796c8dcSSimon Schubert /* If pattern is `[[:'. */
32015796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
32025796c8dcSSimon Schubert
32035796c8dcSSimon Schubert for (;;)
32045796c8dcSSimon Schubert {
32055796c8dcSSimon Schubert PATFETCH (c);
32065796c8dcSSimon Schubert if ((c == ':' && *p == ']') || p == pend)
32075796c8dcSSimon Schubert break;
32085796c8dcSSimon Schubert if (c1 < CHAR_CLASS_MAX_LENGTH)
32095796c8dcSSimon Schubert str[c1++] = c;
32105796c8dcSSimon Schubert else
32115796c8dcSSimon Schubert /* This is in any case an invalid class name. */
32125796c8dcSSimon Schubert str[0] = '\0';
32135796c8dcSSimon Schubert }
32145796c8dcSSimon Schubert str[c1] = '\0';
32155796c8dcSSimon Schubert
32165796c8dcSSimon Schubert /* If isn't a word bracketed by `[:' and `:]':
32175796c8dcSSimon Schubert undo the ending character, the letters, and leave
32185796c8dcSSimon Schubert the leading `:' and `[' (but set bits for them). */
32195796c8dcSSimon Schubert if (c == ':' && *p == ']')
32205796c8dcSSimon Schubert {
32215796c8dcSSimon Schubert # if defined _LIBC || WIDE_CHAR_SUPPORT
32225796c8dcSSimon Schubert boolean is_lower = STREQ (str, "lower");
32235796c8dcSSimon Schubert boolean is_upper = STREQ (str, "upper");
32245796c8dcSSimon Schubert wctype_t wt;
32255796c8dcSSimon Schubert int ch;
32265796c8dcSSimon Schubert
32275796c8dcSSimon Schubert wt = IS_CHAR_CLASS (str);
32285796c8dcSSimon Schubert if (wt == 0)
32295796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECTYPE);
32305796c8dcSSimon Schubert
32315796c8dcSSimon Schubert /* Throw away the ] at the end of the character
32325796c8dcSSimon Schubert class. */
32335796c8dcSSimon Schubert PATFETCH (c);
32345796c8dcSSimon Schubert
32355796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
32365796c8dcSSimon Schubert
32375796c8dcSSimon Schubert for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
32385796c8dcSSimon Schubert {
32395796c8dcSSimon Schubert # ifdef _LIBC
32405796c8dcSSimon Schubert if (__iswctype (__btowc (ch), wt))
32415796c8dcSSimon Schubert SET_LIST_BIT (ch);
32425796c8dcSSimon Schubert # else
32435796c8dcSSimon Schubert if (iswctype (btowc (ch), wt))
32445796c8dcSSimon Schubert SET_LIST_BIT (ch);
32455796c8dcSSimon Schubert # endif
32465796c8dcSSimon Schubert
32475796c8dcSSimon Schubert if (translate && (is_upper || is_lower)
32485796c8dcSSimon Schubert && (ISUPPER (ch) || ISLOWER (ch)))
32495796c8dcSSimon Schubert SET_LIST_BIT (ch);
32505796c8dcSSimon Schubert }
32515796c8dcSSimon Schubert
32525796c8dcSSimon Schubert had_char_class = true;
32535796c8dcSSimon Schubert # else
32545796c8dcSSimon Schubert int ch;
32555796c8dcSSimon Schubert boolean is_alnum = STREQ (str, "alnum");
32565796c8dcSSimon Schubert boolean is_alpha = STREQ (str, "alpha");
32575796c8dcSSimon Schubert boolean is_blank = STREQ (str, "blank");
32585796c8dcSSimon Schubert boolean is_cntrl = STREQ (str, "cntrl");
32595796c8dcSSimon Schubert boolean is_digit = STREQ (str, "digit");
32605796c8dcSSimon Schubert boolean is_graph = STREQ (str, "graph");
32615796c8dcSSimon Schubert boolean is_lower = STREQ (str, "lower");
32625796c8dcSSimon Schubert boolean is_print = STREQ (str, "print");
32635796c8dcSSimon Schubert boolean is_punct = STREQ (str, "punct");
32645796c8dcSSimon Schubert boolean is_space = STREQ (str, "space");
32655796c8dcSSimon Schubert boolean is_upper = STREQ (str, "upper");
32665796c8dcSSimon Schubert boolean is_xdigit = STREQ (str, "xdigit");
32675796c8dcSSimon Schubert
32685796c8dcSSimon Schubert if (!IS_CHAR_CLASS (str))
32695796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECTYPE);
32705796c8dcSSimon Schubert
32715796c8dcSSimon Schubert /* Throw away the ] at the end of the character
32725796c8dcSSimon Schubert class. */
32735796c8dcSSimon Schubert PATFETCH (c);
32745796c8dcSSimon Schubert
32755796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
32765796c8dcSSimon Schubert
32775796c8dcSSimon Schubert for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
32785796c8dcSSimon Schubert {
32795796c8dcSSimon Schubert /* This was split into 3 if's to
32805796c8dcSSimon Schubert avoid an arbitrary limit in some compiler. */
32815796c8dcSSimon Schubert if ( (is_alnum && ISALNUM (ch))
32825796c8dcSSimon Schubert || (is_alpha && ISALPHA (ch))
32835796c8dcSSimon Schubert || (is_blank && ISBLANK (ch))
32845796c8dcSSimon Schubert || (is_cntrl && ISCNTRL (ch)))
32855796c8dcSSimon Schubert SET_LIST_BIT (ch);
32865796c8dcSSimon Schubert if ( (is_digit && ISDIGIT (ch))
32875796c8dcSSimon Schubert || (is_graph && ISGRAPH (ch))
32885796c8dcSSimon Schubert || (is_lower && ISLOWER (ch))
32895796c8dcSSimon Schubert || (is_print && ISPRINT (ch)))
32905796c8dcSSimon Schubert SET_LIST_BIT (ch);
32915796c8dcSSimon Schubert if ( (is_punct && ISPUNCT (ch))
32925796c8dcSSimon Schubert || (is_space && ISSPACE (ch))
32935796c8dcSSimon Schubert || (is_upper && ISUPPER (ch))
32945796c8dcSSimon Schubert || (is_xdigit && ISXDIGIT (ch)))
32955796c8dcSSimon Schubert SET_LIST_BIT (ch);
32965796c8dcSSimon Schubert if ( translate && (is_upper || is_lower)
32975796c8dcSSimon Schubert && (ISUPPER (ch) || ISLOWER (ch)))
32985796c8dcSSimon Schubert SET_LIST_BIT (ch);
32995796c8dcSSimon Schubert }
33005796c8dcSSimon Schubert had_char_class = true;
33015796c8dcSSimon Schubert # endif /* libc || wctype.h */
33025796c8dcSSimon Schubert }
33035796c8dcSSimon Schubert else
33045796c8dcSSimon Schubert {
33055796c8dcSSimon Schubert c1++;
33065796c8dcSSimon Schubert while (c1--)
33075796c8dcSSimon Schubert PATUNFETCH;
33085796c8dcSSimon Schubert SET_LIST_BIT ('[');
33095796c8dcSSimon Schubert SET_LIST_BIT (':');
33105796c8dcSSimon Schubert range_start = ':';
33115796c8dcSSimon Schubert had_char_class = false;
33125796c8dcSSimon Schubert }
33135796c8dcSSimon Schubert }
33145796c8dcSSimon Schubert else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
33155796c8dcSSimon Schubert {
33165796c8dcSSimon Schubert unsigned char str[MB_LEN_MAX + 1];
33175796c8dcSSimon Schubert # ifdef _LIBC
33185796c8dcSSimon Schubert uint32_t nrules =
33195796c8dcSSimon Schubert _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
33205796c8dcSSimon Schubert # endif
33215796c8dcSSimon Schubert
33225796c8dcSSimon Schubert PATFETCH (c);
33235796c8dcSSimon Schubert c1 = 0;
33245796c8dcSSimon Schubert
33255796c8dcSSimon Schubert /* If pattern is `[[='. */
33265796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
33275796c8dcSSimon Schubert
33285796c8dcSSimon Schubert for (;;)
33295796c8dcSSimon Schubert {
33305796c8dcSSimon Schubert PATFETCH (c);
33315796c8dcSSimon Schubert if ((c == '=' && *p == ']') || p == pend)
33325796c8dcSSimon Schubert break;
33335796c8dcSSimon Schubert if (c1 < MB_LEN_MAX)
33345796c8dcSSimon Schubert str[c1++] = c;
33355796c8dcSSimon Schubert else
33365796c8dcSSimon Schubert /* This is in any case an invalid class name. */
33375796c8dcSSimon Schubert str[0] = '\0';
33385796c8dcSSimon Schubert }
33395796c8dcSSimon Schubert str[c1] = '\0';
33405796c8dcSSimon Schubert
33415796c8dcSSimon Schubert if (c == '=' && *p == ']' && str[0] != '\0')
33425796c8dcSSimon Schubert {
33435796c8dcSSimon Schubert /* If we have no collation data we use the default
33445796c8dcSSimon Schubert collation in which each character is in a class
33455796c8dcSSimon Schubert by itself. It also means that ASCII is the
33465796c8dcSSimon Schubert character set and therefore we cannot have character
33475796c8dcSSimon Schubert with more than one byte in the multibyte
33485796c8dcSSimon Schubert representation. */
33495796c8dcSSimon Schubert # ifdef _LIBC
33505796c8dcSSimon Schubert if (nrules == 0)
33515796c8dcSSimon Schubert # endif
33525796c8dcSSimon Schubert {
33535796c8dcSSimon Schubert if (c1 != 1)
33545796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECOLLATE);
33555796c8dcSSimon Schubert
33565796c8dcSSimon Schubert /* Throw away the ] at the end of the equivalence
33575796c8dcSSimon Schubert class. */
33585796c8dcSSimon Schubert PATFETCH (c);
33595796c8dcSSimon Schubert
33605796c8dcSSimon Schubert /* Set the bit for the character. */
33615796c8dcSSimon Schubert SET_LIST_BIT (str[0]);
33625796c8dcSSimon Schubert }
33635796c8dcSSimon Schubert # ifdef _LIBC
33645796c8dcSSimon Schubert else
33655796c8dcSSimon Schubert {
33665796c8dcSSimon Schubert /* Try to match the byte sequence in `str' against
33675796c8dcSSimon Schubert those known to the collate implementation.
33685796c8dcSSimon Schubert First find out whether the bytes in `str' are
33695796c8dcSSimon Schubert actually from exactly one character. */
33705796c8dcSSimon Schubert const int32_t *table;
33715796c8dcSSimon Schubert const unsigned char *weights;
33725796c8dcSSimon Schubert const unsigned char *extra;
33735796c8dcSSimon Schubert const int32_t *indirect;
33745796c8dcSSimon Schubert int32_t idx;
33755796c8dcSSimon Schubert const unsigned char *cp = str;
33765796c8dcSSimon Schubert int ch;
33775796c8dcSSimon Schubert
33785796c8dcSSimon Schubert /* This #include defines a local function! */
33795796c8dcSSimon Schubert # include <locale/weight.h>
33805796c8dcSSimon Schubert
33815796c8dcSSimon Schubert table = (const int32_t *)
33825796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
33835796c8dcSSimon Schubert weights = (const unsigned char *)
33845796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
33855796c8dcSSimon Schubert extra = (const unsigned char *)
33865796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
33875796c8dcSSimon Schubert indirect = (const int32_t *)
33885796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
33895796c8dcSSimon Schubert
33905796c8dcSSimon Schubert idx = findidx (&cp);
33915796c8dcSSimon Schubert if (idx == 0 || cp < str + c1)
33925796c8dcSSimon Schubert /* This is no valid character. */
33935796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECOLLATE);
33945796c8dcSSimon Schubert
33955796c8dcSSimon Schubert /* Throw away the ] at the end of the equivalence
33965796c8dcSSimon Schubert class. */
33975796c8dcSSimon Schubert PATFETCH (c);
33985796c8dcSSimon Schubert
33995796c8dcSSimon Schubert /* Now we have to go throught the whole table
34005796c8dcSSimon Schubert and find all characters which have the same
34015796c8dcSSimon Schubert first level weight.
34025796c8dcSSimon Schubert
34035796c8dcSSimon Schubert XXX Note that this is not entirely correct.
34045796c8dcSSimon Schubert we would have to match multibyte sequences
34055796c8dcSSimon Schubert but this is not possible with the current
34065796c8dcSSimon Schubert implementation. */
34075796c8dcSSimon Schubert for (ch = 1; ch < 256; ++ch)
34085796c8dcSSimon Schubert /* XXX This test would have to be changed if we
34095796c8dcSSimon Schubert would allow matching multibyte sequences. */
34105796c8dcSSimon Schubert if (table[ch] > 0)
34115796c8dcSSimon Schubert {
34125796c8dcSSimon Schubert int32_t idx2 = table[ch];
34135796c8dcSSimon Schubert size_t len = weights[idx2];
34145796c8dcSSimon Schubert
34155796c8dcSSimon Schubert /* Test whether the lenghts match. */
34165796c8dcSSimon Schubert if (weights[idx] == len)
34175796c8dcSSimon Schubert {
34185796c8dcSSimon Schubert /* They do. New compare the bytes of
34195796c8dcSSimon Schubert the weight. */
34205796c8dcSSimon Schubert size_t cnt = 0;
34215796c8dcSSimon Schubert
34225796c8dcSSimon Schubert while (cnt < len
34235796c8dcSSimon Schubert && (weights[idx + 1 + cnt]
34245796c8dcSSimon Schubert == weights[idx2 + 1 + cnt]))
34255796c8dcSSimon Schubert ++cnt;
34265796c8dcSSimon Schubert
34275796c8dcSSimon Schubert if (cnt == len)
34285796c8dcSSimon Schubert /* They match. Mark the character as
34295796c8dcSSimon Schubert acceptable. */
34305796c8dcSSimon Schubert SET_LIST_BIT (ch);
34315796c8dcSSimon Schubert }
34325796c8dcSSimon Schubert }
34335796c8dcSSimon Schubert }
34345796c8dcSSimon Schubert # endif
34355796c8dcSSimon Schubert had_char_class = true;
34365796c8dcSSimon Schubert }
34375796c8dcSSimon Schubert else
34385796c8dcSSimon Schubert {
34395796c8dcSSimon Schubert c1++;
34405796c8dcSSimon Schubert while (c1--)
34415796c8dcSSimon Schubert PATUNFETCH;
34425796c8dcSSimon Schubert SET_LIST_BIT ('[');
34435796c8dcSSimon Schubert SET_LIST_BIT ('=');
34445796c8dcSSimon Schubert range_start = '=';
34455796c8dcSSimon Schubert had_char_class = false;
34465796c8dcSSimon Schubert }
34475796c8dcSSimon Schubert }
34485796c8dcSSimon Schubert else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
34495796c8dcSSimon Schubert {
34505796c8dcSSimon Schubert unsigned char str[128]; /* Should be large enough. */
34515796c8dcSSimon Schubert # ifdef _LIBC
34525796c8dcSSimon Schubert uint32_t nrules =
34535796c8dcSSimon Schubert _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
34545796c8dcSSimon Schubert # endif
34555796c8dcSSimon Schubert
34565796c8dcSSimon Schubert PATFETCH (c);
34575796c8dcSSimon Schubert c1 = 0;
34585796c8dcSSimon Schubert
34595796c8dcSSimon Schubert /* If pattern is `[[.'. */
34605796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
34615796c8dcSSimon Schubert
34625796c8dcSSimon Schubert for (;;)
34635796c8dcSSimon Schubert {
34645796c8dcSSimon Schubert PATFETCH (c);
34655796c8dcSSimon Schubert if ((c == '.' && *p == ']') || p == pend)
34665796c8dcSSimon Schubert break;
34675796c8dcSSimon Schubert if (c1 < sizeof (str))
34685796c8dcSSimon Schubert str[c1++] = c;
34695796c8dcSSimon Schubert else
34705796c8dcSSimon Schubert /* This is in any case an invalid class name. */
34715796c8dcSSimon Schubert str[0] = '\0';
34725796c8dcSSimon Schubert }
34735796c8dcSSimon Schubert str[c1] = '\0';
34745796c8dcSSimon Schubert
34755796c8dcSSimon Schubert if (c == '.' && *p == ']' && str[0] != '\0')
34765796c8dcSSimon Schubert {
34775796c8dcSSimon Schubert /* If we have no collation data we use the default
34785796c8dcSSimon Schubert collation in which each character is the name
34795796c8dcSSimon Schubert for its own class which contains only the one
34805796c8dcSSimon Schubert character. It also means that ASCII is the
34815796c8dcSSimon Schubert character set and therefore we cannot have character
34825796c8dcSSimon Schubert with more than one byte in the multibyte
34835796c8dcSSimon Schubert representation. */
34845796c8dcSSimon Schubert # ifdef _LIBC
34855796c8dcSSimon Schubert if (nrules == 0)
34865796c8dcSSimon Schubert # endif
34875796c8dcSSimon Schubert {
34885796c8dcSSimon Schubert if (c1 != 1)
34895796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECOLLATE);
34905796c8dcSSimon Schubert
34915796c8dcSSimon Schubert /* Throw away the ] at the end of the equivalence
34925796c8dcSSimon Schubert class. */
34935796c8dcSSimon Schubert PATFETCH (c);
34945796c8dcSSimon Schubert
34955796c8dcSSimon Schubert /* Set the bit for the character. */
34965796c8dcSSimon Schubert SET_LIST_BIT (str[0]);
34975796c8dcSSimon Schubert range_start = ((const unsigned char *) str)[0];
34985796c8dcSSimon Schubert }
34995796c8dcSSimon Schubert # ifdef _LIBC
35005796c8dcSSimon Schubert else
35015796c8dcSSimon Schubert {
35025796c8dcSSimon Schubert /* Try to match the byte sequence in `str' against
35035796c8dcSSimon Schubert those known to the collate implementation.
35045796c8dcSSimon Schubert First find out whether the bytes in `str' are
35055796c8dcSSimon Schubert actually from exactly one character. */
35065796c8dcSSimon Schubert int32_t table_size;
35075796c8dcSSimon Schubert const int32_t *symb_table;
35085796c8dcSSimon Schubert const unsigned char *extra;
35095796c8dcSSimon Schubert int32_t idx;
35105796c8dcSSimon Schubert int32_t elem;
35115796c8dcSSimon Schubert int32_t second;
35125796c8dcSSimon Schubert int32_t hash;
35135796c8dcSSimon Schubert
35145796c8dcSSimon Schubert table_size =
35155796c8dcSSimon Schubert _NL_CURRENT_WORD (LC_COLLATE,
35165796c8dcSSimon Schubert _NL_COLLATE_SYMB_HASH_SIZEMB);
35175796c8dcSSimon Schubert symb_table = (const int32_t *)
35185796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
35195796c8dcSSimon Schubert _NL_COLLATE_SYMB_TABLEMB);
35205796c8dcSSimon Schubert extra = (const unsigned char *)
35215796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE,
35225796c8dcSSimon Schubert _NL_COLLATE_SYMB_EXTRAMB);
35235796c8dcSSimon Schubert
35245796c8dcSSimon Schubert /* Locate the character in the hashing table. */
35255796c8dcSSimon Schubert hash = elem_hash (str, c1);
35265796c8dcSSimon Schubert
35275796c8dcSSimon Schubert idx = 0;
35285796c8dcSSimon Schubert elem = hash % table_size;
35295796c8dcSSimon Schubert second = hash % (table_size - 2);
35305796c8dcSSimon Schubert while (symb_table[2 * elem] != 0)
35315796c8dcSSimon Schubert {
35325796c8dcSSimon Schubert /* First compare the hashing value. */
35335796c8dcSSimon Schubert if (symb_table[2 * elem] == hash
35345796c8dcSSimon Schubert && c1 == extra[symb_table[2 * elem + 1]]
35355796c8dcSSimon Schubert && memcmp (str,
35365796c8dcSSimon Schubert &extra[symb_table[2 * elem + 1]
35375796c8dcSSimon Schubert + 1],
35385796c8dcSSimon Schubert c1) == 0)
35395796c8dcSSimon Schubert {
35405796c8dcSSimon Schubert /* Yep, this is the entry. */
35415796c8dcSSimon Schubert idx = symb_table[2 * elem + 1];
35425796c8dcSSimon Schubert idx += 1 + extra[idx];
35435796c8dcSSimon Schubert break;
35445796c8dcSSimon Schubert }
35455796c8dcSSimon Schubert
35465796c8dcSSimon Schubert /* Next entry. */
35475796c8dcSSimon Schubert elem += second;
35485796c8dcSSimon Schubert }
35495796c8dcSSimon Schubert
35505796c8dcSSimon Schubert if (symb_table[2 * elem] == 0)
35515796c8dcSSimon Schubert /* This is no valid character. */
35525796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ECOLLATE);
35535796c8dcSSimon Schubert
35545796c8dcSSimon Schubert /* Throw away the ] at the end of the equivalence
35555796c8dcSSimon Schubert class. */
35565796c8dcSSimon Schubert PATFETCH (c);
35575796c8dcSSimon Schubert
35585796c8dcSSimon Schubert /* Now add the multibyte character(s) we found
35595796c8dcSSimon Schubert to the accept list.
35605796c8dcSSimon Schubert
35615796c8dcSSimon Schubert XXX Note that this is not entirely correct.
35625796c8dcSSimon Schubert we would have to match multibyte sequences
35635796c8dcSSimon Schubert but this is not possible with the current
35645796c8dcSSimon Schubert implementation. Also, we have to match
35655796c8dcSSimon Schubert collating symbols, which expand to more than
35665796c8dcSSimon Schubert one file, as a whole and not allow the
35675796c8dcSSimon Schubert individual bytes. */
35685796c8dcSSimon Schubert c1 = extra[idx++];
35695796c8dcSSimon Schubert if (c1 == 1)
35705796c8dcSSimon Schubert range_start = extra[idx];
35715796c8dcSSimon Schubert while (c1-- > 0)
35725796c8dcSSimon Schubert {
35735796c8dcSSimon Schubert SET_LIST_BIT (extra[idx]);
35745796c8dcSSimon Schubert ++idx;
35755796c8dcSSimon Schubert }
35765796c8dcSSimon Schubert }
35775796c8dcSSimon Schubert # endif
35785796c8dcSSimon Schubert had_char_class = false;
35795796c8dcSSimon Schubert }
35805796c8dcSSimon Schubert else
35815796c8dcSSimon Schubert {
35825796c8dcSSimon Schubert c1++;
35835796c8dcSSimon Schubert while (c1--)
35845796c8dcSSimon Schubert PATUNFETCH;
35855796c8dcSSimon Schubert SET_LIST_BIT ('[');
35865796c8dcSSimon Schubert SET_LIST_BIT ('.');
35875796c8dcSSimon Schubert range_start = '.';
35885796c8dcSSimon Schubert had_char_class = false;
35895796c8dcSSimon Schubert }
35905796c8dcSSimon Schubert }
35915796c8dcSSimon Schubert else
35925796c8dcSSimon Schubert {
35935796c8dcSSimon Schubert had_char_class = false;
35945796c8dcSSimon Schubert SET_LIST_BIT (c);
35955796c8dcSSimon Schubert range_start = c;
35965796c8dcSSimon Schubert }
35975796c8dcSSimon Schubert }
35985796c8dcSSimon Schubert
35995796c8dcSSimon Schubert /* Discard any (non)matching list bytes that are all 0 at the
36005796c8dcSSimon Schubert end of the map. Decrease the map-length byte too. */
36015796c8dcSSimon Schubert while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
36025796c8dcSSimon Schubert b[-1]--;
36035796c8dcSSimon Schubert b += b[-1];
36045796c8dcSSimon Schubert #endif /* WCHAR */
36055796c8dcSSimon Schubert }
36065796c8dcSSimon Schubert break;
36075796c8dcSSimon Schubert
36085796c8dcSSimon Schubert
36095796c8dcSSimon Schubert case '(':
36105796c8dcSSimon Schubert if (syntax & RE_NO_BK_PARENS)
36115796c8dcSSimon Schubert goto handle_open;
36125796c8dcSSimon Schubert else
36135796c8dcSSimon Schubert goto normal_char;
36145796c8dcSSimon Schubert
36155796c8dcSSimon Schubert
36165796c8dcSSimon Schubert case ')':
36175796c8dcSSimon Schubert if (syntax & RE_NO_BK_PARENS)
36185796c8dcSSimon Schubert goto handle_close;
36195796c8dcSSimon Schubert else
36205796c8dcSSimon Schubert goto normal_char;
36215796c8dcSSimon Schubert
36225796c8dcSSimon Schubert
36235796c8dcSSimon Schubert case '\n':
36245796c8dcSSimon Schubert if (syntax & RE_NEWLINE_ALT)
36255796c8dcSSimon Schubert goto handle_alt;
36265796c8dcSSimon Schubert else
36275796c8dcSSimon Schubert goto normal_char;
36285796c8dcSSimon Schubert
36295796c8dcSSimon Schubert
36305796c8dcSSimon Schubert case '|':
36315796c8dcSSimon Schubert if (syntax & RE_NO_BK_VBAR)
36325796c8dcSSimon Schubert goto handle_alt;
36335796c8dcSSimon Schubert else
36345796c8dcSSimon Schubert goto normal_char;
36355796c8dcSSimon Schubert
36365796c8dcSSimon Schubert
36375796c8dcSSimon Schubert case '{':
36385796c8dcSSimon Schubert if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
36395796c8dcSSimon Schubert goto handle_interval;
36405796c8dcSSimon Schubert else
36415796c8dcSSimon Schubert goto normal_char;
36425796c8dcSSimon Schubert
36435796c8dcSSimon Schubert
36445796c8dcSSimon Schubert case '\\':
36455796c8dcSSimon Schubert if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
36465796c8dcSSimon Schubert
36475796c8dcSSimon Schubert /* Do not translate the character after the \, so that we can
36485796c8dcSSimon Schubert distinguish, e.g., \B from \b, even if we normally would
36495796c8dcSSimon Schubert translate, e.g., B to b. */
36505796c8dcSSimon Schubert PATFETCH_RAW (c);
36515796c8dcSSimon Schubert
36525796c8dcSSimon Schubert switch (c)
36535796c8dcSSimon Schubert {
36545796c8dcSSimon Schubert case '(':
36555796c8dcSSimon Schubert if (syntax & RE_NO_BK_PARENS)
36565796c8dcSSimon Schubert goto normal_backslash;
36575796c8dcSSimon Schubert
36585796c8dcSSimon Schubert handle_open:
36595796c8dcSSimon Schubert bufp->re_nsub++;
36605796c8dcSSimon Schubert regnum++;
36615796c8dcSSimon Schubert
36625796c8dcSSimon Schubert if (COMPILE_STACK_FULL)
36635796c8dcSSimon Schubert {
36645796c8dcSSimon Schubert RETALLOC (compile_stack.stack, compile_stack.size << 1,
36655796c8dcSSimon Schubert compile_stack_elt_t);
36665796c8dcSSimon Schubert if (compile_stack.stack == NULL) return REG_ESPACE;
36675796c8dcSSimon Schubert
36685796c8dcSSimon Schubert compile_stack.size <<= 1;
36695796c8dcSSimon Schubert }
36705796c8dcSSimon Schubert
36715796c8dcSSimon Schubert /* These are the values to restore when we hit end of this
36725796c8dcSSimon Schubert group. They are all relative offsets, so that if the
36735796c8dcSSimon Schubert whole pattern moves because of realloc, they will still
36745796c8dcSSimon Schubert be valid. */
36755796c8dcSSimon Schubert COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
36765796c8dcSSimon Schubert COMPILE_STACK_TOP.fixup_alt_jump
36775796c8dcSSimon Schubert = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
36785796c8dcSSimon Schubert COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
36795796c8dcSSimon Schubert COMPILE_STACK_TOP.regnum = regnum;
36805796c8dcSSimon Schubert
36815796c8dcSSimon Schubert /* We will eventually replace the 0 with the number of
36825796c8dcSSimon Schubert groups inner to this one. But do not push a
36835796c8dcSSimon Schubert start_memory for groups beyond the last one we can
36845796c8dcSSimon Schubert represent in the compiled pattern. */
36855796c8dcSSimon Schubert if (regnum <= MAX_REGNUM)
36865796c8dcSSimon Schubert {
36875796c8dcSSimon Schubert COMPILE_STACK_TOP.inner_group_offset = b
36885796c8dcSSimon Schubert - COMPILED_BUFFER_VAR + 2;
36895796c8dcSSimon Schubert BUF_PUSH_3 (start_memory, regnum, 0);
36905796c8dcSSimon Schubert }
36915796c8dcSSimon Schubert
36925796c8dcSSimon Schubert compile_stack.avail++;
36935796c8dcSSimon Schubert
36945796c8dcSSimon Schubert fixup_alt_jump = 0;
36955796c8dcSSimon Schubert laststart = 0;
36965796c8dcSSimon Schubert begalt = b;
36975796c8dcSSimon Schubert /* If we've reached MAX_REGNUM groups, then this open
36985796c8dcSSimon Schubert won't actually generate any code, so we'll have to
36995796c8dcSSimon Schubert clear pending_exact explicitly. */
37005796c8dcSSimon Schubert pending_exact = 0;
37015796c8dcSSimon Schubert break;
37025796c8dcSSimon Schubert
37035796c8dcSSimon Schubert
37045796c8dcSSimon Schubert case ')':
37055796c8dcSSimon Schubert if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
37065796c8dcSSimon Schubert
37075796c8dcSSimon Schubert if (COMPILE_STACK_EMPTY)
37085796c8dcSSimon Schubert {
37095796c8dcSSimon Schubert if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
37105796c8dcSSimon Schubert goto normal_backslash;
37115796c8dcSSimon Schubert else
37125796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ERPAREN);
37135796c8dcSSimon Schubert }
37145796c8dcSSimon Schubert
37155796c8dcSSimon Schubert handle_close:
37165796c8dcSSimon Schubert if (fixup_alt_jump)
37175796c8dcSSimon Schubert { /* Push a dummy failure point at the end of the
37185796c8dcSSimon Schubert alternative for a possible future
37195796c8dcSSimon Schubert `pop_failure_jump' to pop. See comments at
37205796c8dcSSimon Schubert `push_dummy_failure' in `re_match_2'. */
37215796c8dcSSimon Schubert BUF_PUSH (push_dummy_failure);
37225796c8dcSSimon Schubert
37235796c8dcSSimon Schubert /* We allocated space for this jump when we assigned
37245796c8dcSSimon Schubert to `fixup_alt_jump', in the `handle_alt' case below. */
37255796c8dcSSimon Schubert STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
37265796c8dcSSimon Schubert }
37275796c8dcSSimon Schubert
37285796c8dcSSimon Schubert /* See similar code for backslashed left paren above. */
37295796c8dcSSimon Schubert if (COMPILE_STACK_EMPTY)
37305796c8dcSSimon Schubert {
37315796c8dcSSimon Schubert if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
37325796c8dcSSimon Schubert goto normal_char;
37335796c8dcSSimon Schubert else
37345796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ERPAREN);
37355796c8dcSSimon Schubert }
37365796c8dcSSimon Schubert
37375796c8dcSSimon Schubert /* Since we just checked for an empty stack above, this
37385796c8dcSSimon Schubert ``can't happen''. */
37395796c8dcSSimon Schubert assert (compile_stack.avail != 0);
37405796c8dcSSimon Schubert {
37415796c8dcSSimon Schubert /* We don't just want to restore into `regnum', because
37425796c8dcSSimon Schubert later groups should continue to be numbered higher,
37435796c8dcSSimon Schubert as in `(ab)c(de)' -- the second group is #2. */
37445796c8dcSSimon Schubert regnum_t this_group_regnum;
37455796c8dcSSimon Schubert
37465796c8dcSSimon Schubert compile_stack.avail--;
37475796c8dcSSimon Schubert begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
37485796c8dcSSimon Schubert fixup_alt_jump
37495796c8dcSSimon Schubert = COMPILE_STACK_TOP.fixup_alt_jump
37505796c8dcSSimon Schubert ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
37515796c8dcSSimon Schubert : 0;
37525796c8dcSSimon Schubert laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
37535796c8dcSSimon Schubert this_group_regnum = COMPILE_STACK_TOP.regnum;
37545796c8dcSSimon Schubert /* If we've reached MAX_REGNUM groups, then this open
37555796c8dcSSimon Schubert won't actually generate any code, so we'll have to
37565796c8dcSSimon Schubert clear pending_exact explicitly. */
37575796c8dcSSimon Schubert pending_exact = 0;
37585796c8dcSSimon Schubert
37595796c8dcSSimon Schubert /* We're at the end of the group, so now we know how many
37605796c8dcSSimon Schubert groups were inside this one. */
37615796c8dcSSimon Schubert if (this_group_regnum <= MAX_REGNUM)
37625796c8dcSSimon Schubert {
37635796c8dcSSimon Schubert UCHAR_T *inner_group_loc
37645796c8dcSSimon Schubert = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
37655796c8dcSSimon Schubert
37665796c8dcSSimon Schubert *inner_group_loc = regnum - this_group_regnum;
37675796c8dcSSimon Schubert BUF_PUSH_3 (stop_memory, this_group_regnum,
37685796c8dcSSimon Schubert regnum - this_group_regnum);
37695796c8dcSSimon Schubert }
37705796c8dcSSimon Schubert }
37715796c8dcSSimon Schubert break;
37725796c8dcSSimon Schubert
37735796c8dcSSimon Schubert
37745796c8dcSSimon Schubert case '|': /* `\|'. */
37755796c8dcSSimon Schubert if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
37765796c8dcSSimon Schubert goto normal_backslash;
37775796c8dcSSimon Schubert handle_alt:
37785796c8dcSSimon Schubert if (syntax & RE_LIMITED_OPS)
37795796c8dcSSimon Schubert goto normal_char;
37805796c8dcSSimon Schubert
37815796c8dcSSimon Schubert /* Insert before the previous alternative a jump which
37825796c8dcSSimon Schubert jumps to this alternative if the former fails. */
37835796c8dcSSimon Schubert GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
37845796c8dcSSimon Schubert INSERT_JUMP (on_failure_jump, begalt,
37855796c8dcSSimon Schubert b + 2 + 2 * OFFSET_ADDRESS_SIZE);
37865796c8dcSSimon Schubert pending_exact = 0;
37875796c8dcSSimon Schubert b += 1 + OFFSET_ADDRESS_SIZE;
37885796c8dcSSimon Schubert
37895796c8dcSSimon Schubert /* The alternative before this one has a jump after it
37905796c8dcSSimon Schubert which gets executed if it gets matched. Adjust that
37915796c8dcSSimon Schubert jump so it will jump to this alternative's analogous
37925796c8dcSSimon Schubert jump (put in below, which in turn will jump to the next
37935796c8dcSSimon Schubert (if any) alternative's such jump, etc.). The last such
37945796c8dcSSimon Schubert jump jumps to the correct final destination. A picture:
37955796c8dcSSimon Schubert _____ _____
37965796c8dcSSimon Schubert | | | |
37975796c8dcSSimon Schubert | v | v
37985796c8dcSSimon Schubert a | b | c
37995796c8dcSSimon Schubert
38005796c8dcSSimon Schubert If we are at `b', then fixup_alt_jump right now points to a
38015796c8dcSSimon Schubert three-byte space after `a'. We'll put in the jump, set
38025796c8dcSSimon Schubert fixup_alt_jump to right after `b', and leave behind three
38035796c8dcSSimon Schubert bytes which we'll fill in when we get to after `c'. */
38045796c8dcSSimon Schubert
38055796c8dcSSimon Schubert if (fixup_alt_jump)
38065796c8dcSSimon Schubert STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
38075796c8dcSSimon Schubert
38085796c8dcSSimon Schubert /* Mark and leave space for a jump after this alternative,
38095796c8dcSSimon Schubert to be filled in later either by next alternative or
38105796c8dcSSimon Schubert when know we're at the end of a series of alternatives. */
38115796c8dcSSimon Schubert fixup_alt_jump = b;
38125796c8dcSSimon Schubert GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
38135796c8dcSSimon Schubert b += 1 + OFFSET_ADDRESS_SIZE;
38145796c8dcSSimon Schubert
38155796c8dcSSimon Schubert laststart = 0;
38165796c8dcSSimon Schubert begalt = b;
38175796c8dcSSimon Schubert break;
38185796c8dcSSimon Schubert
38195796c8dcSSimon Schubert
38205796c8dcSSimon Schubert case '{':
38215796c8dcSSimon Schubert /* If \{ is a literal. */
38225796c8dcSSimon Schubert if (!(syntax & RE_INTERVALS)
38235796c8dcSSimon Schubert /* If we're at `\{' and it's not the open-interval
38245796c8dcSSimon Schubert operator. */
38255796c8dcSSimon Schubert || (syntax & RE_NO_BK_BRACES))
38265796c8dcSSimon Schubert goto normal_backslash;
38275796c8dcSSimon Schubert
38285796c8dcSSimon Schubert handle_interval:
38295796c8dcSSimon Schubert {
38305796c8dcSSimon Schubert /* If got here, then the syntax allows intervals. */
38315796c8dcSSimon Schubert
38325796c8dcSSimon Schubert /* At least (most) this many matches must be made. */
38335796c8dcSSimon Schubert int lower_bound = -1, upper_bound = -1;
38345796c8dcSSimon Schubert
38355796c8dcSSimon Schubert /* Place in the uncompiled pattern (i.e., just after
38365796c8dcSSimon Schubert the '{') to go back to if the interval is invalid. */
38375796c8dcSSimon Schubert const CHAR_T *beg_interval = p;
38385796c8dcSSimon Schubert
38395796c8dcSSimon Schubert if (p == pend)
38405796c8dcSSimon Schubert goto invalid_interval;
38415796c8dcSSimon Schubert
38425796c8dcSSimon Schubert GET_UNSIGNED_NUMBER (lower_bound);
38435796c8dcSSimon Schubert
38445796c8dcSSimon Schubert if (c == ',')
38455796c8dcSSimon Schubert {
38465796c8dcSSimon Schubert GET_UNSIGNED_NUMBER (upper_bound);
38475796c8dcSSimon Schubert if (upper_bound < 0)
38485796c8dcSSimon Schubert upper_bound = RE_DUP_MAX;
38495796c8dcSSimon Schubert }
38505796c8dcSSimon Schubert else
38515796c8dcSSimon Schubert /* Interval such as `{1}' => match exactly once. */
38525796c8dcSSimon Schubert upper_bound = lower_bound;
38535796c8dcSSimon Schubert
38545796c8dcSSimon Schubert if (! (0 <= lower_bound && lower_bound <= upper_bound))
38555796c8dcSSimon Schubert goto invalid_interval;
38565796c8dcSSimon Schubert
38575796c8dcSSimon Schubert if (!(syntax & RE_NO_BK_BRACES))
38585796c8dcSSimon Schubert {
38595796c8dcSSimon Schubert if (c != '\\' || p == pend)
38605796c8dcSSimon Schubert goto invalid_interval;
38615796c8dcSSimon Schubert PATFETCH (c);
38625796c8dcSSimon Schubert }
38635796c8dcSSimon Schubert
38645796c8dcSSimon Schubert if (c != '}')
38655796c8dcSSimon Schubert goto invalid_interval;
38665796c8dcSSimon Schubert
38675796c8dcSSimon Schubert /* If it's invalid to have no preceding re. */
38685796c8dcSSimon Schubert if (!laststart)
38695796c8dcSSimon Schubert {
38705796c8dcSSimon Schubert if (syntax & RE_CONTEXT_INVALID_OPS
38715796c8dcSSimon Schubert && !(syntax & RE_INVALID_INTERVAL_ORD))
38725796c8dcSSimon Schubert FREE_STACK_RETURN (REG_BADRPT);
38735796c8dcSSimon Schubert else if (syntax & RE_CONTEXT_INDEP_OPS)
38745796c8dcSSimon Schubert laststart = b;
38755796c8dcSSimon Schubert else
38765796c8dcSSimon Schubert goto unfetch_interval;
38775796c8dcSSimon Schubert }
38785796c8dcSSimon Schubert
38795796c8dcSSimon Schubert /* We just parsed a valid interval. */
38805796c8dcSSimon Schubert
38815796c8dcSSimon Schubert if (RE_DUP_MAX < upper_bound)
38825796c8dcSSimon Schubert FREE_STACK_RETURN (REG_BADBR);
38835796c8dcSSimon Schubert
38845796c8dcSSimon Schubert /* If the upper bound is zero, don't want to succeed at
38855796c8dcSSimon Schubert all; jump from `laststart' to `b + 3', which will be
38865796c8dcSSimon Schubert the end of the buffer after we insert the jump. */
38875796c8dcSSimon Schubert /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
38885796c8dcSSimon Schubert instead of 'b + 3'. */
38895796c8dcSSimon Schubert if (upper_bound == 0)
38905796c8dcSSimon Schubert {
38915796c8dcSSimon Schubert GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
38925796c8dcSSimon Schubert INSERT_JUMP (jump, laststart, b + 1
38935796c8dcSSimon Schubert + OFFSET_ADDRESS_SIZE);
38945796c8dcSSimon Schubert b += 1 + OFFSET_ADDRESS_SIZE;
38955796c8dcSSimon Schubert }
38965796c8dcSSimon Schubert
38975796c8dcSSimon Schubert /* Otherwise, we have a nontrivial interval. When
38985796c8dcSSimon Schubert we're all done, the pattern will look like:
38995796c8dcSSimon Schubert set_number_at <jump count> <upper bound>
39005796c8dcSSimon Schubert set_number_at <succeed_n count> <lower bound>
39015796c8dcSSimon Schubert succeed_n <after jump addr> <succeed_n count>
39025796c8dcSSimon Schubert <body of loop>
39035796c8dcSSimon Schubert jump_n <succeed_n addr> <jump count>
39045796c8dcSSimon Schubert (The upper bound and `jump_n' are omitted if
39055796c8dcSSimon Schubert `upper_bound' is 1, though.) */
39065796c8dcSSimon Schubert else
39075796c8dcSSimon Schubert { /* If the upper bound is > 1, we need to insert
39085796c8dcSSimon Schubert more at the end of the loop. */
39095796c8dcSSimon Schubert unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
39105796c8dcSSimon Schubert (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
39115796c8dcSSimon Schubert
39125796c8dcSSimon Schubert GET_BUFFER_SPACE (nbytes);
39135796c8dcSSimon Schubert
39145796c8dcSSimon Schubert /* Initialize lower bound of the `succeed_n', even
39155796c8dcSSimon Schubert though it will be set during matching by its
39165796c8dcSSimon Schubert attendant `set_number_at' (inserted next),
39175796c8dcSSimon Schubert because `re_compile_fastmap' needs to know.
39185796c8dcSSimon Schubert Jump to the `jump_n' we might insert below. */
39195796c8dcSSimon Schubert INSERT_JUMP2 (succeed_n, laststart,
39205796c8dcSSimon Schubert b + 1 + 2 * OFFSET_ADDRESS_SIZE
39215796c8dcSSimon Schubert + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
39225796c8dcSSimon Schubert , lower_bound);
39235796c8dcSSimon Schubert b += 1 + 2 * OFFSET_ADDRESS_SIZE;
39245796c8dcSSimon Schubert
39255796c8dcSSimon Schubert /* Code to initialize the lower bound. Insert
39265796c8dcSSimon Schubert before the `succeed_n'. The `5' is the last two
39275796c8dcSSimon Schubert bytes of this `set_number_at', plus 3 bytes of
39285796c8dcSSimon Schubert the following `succeed_n'. */
39295796c8dcSSimon Schubert /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
39305796c8dcSSimon Schubert is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
39315796c8dcSSimon Schubert of the following `succeed_n'. */
39325796c8dcSSimon Schubert PREFIX(insert_op2) (set_number_at, laststart, 1
39335796c8dcSSimon Schubert + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
39345796c8dcSSimon Schubert b += 1 + 2 * OFFSET_ADDRESS_SIZE;
39355796c8dcSSimon Schubert
39365796c8dcSSimon Schubert if (upper_bound > 1)
39375796c8dcSSimon Schubert { /* More than one repetition is allowed, so
39385796c8dcSSimon Schubert append a backward jump to the `succeed_n'
39395796c8dcSSimon Schubert that starts this interval.
39405796c8dcSSimon Schubert
39415796c8dcSSimon Schubert When we've reached this during matching,
39425796c8dcSSimon Schubert we'll have matched the interval once, so
39435796c8dcSSimon Schubert jump back only `upper_bound - 1' times. */
39445796c8dcSSimon Schubert STORE_JUMP2 (jump_n, b, laststart
39455796c8dcSSimon Schubert + 2 * OFFSET_ADDRESS_SIZE + 1,
39465796c8dcSSimon Schubert upper_bound - 1);
39475796c8dcSSimon Schubert b += 1 + 2 * OFFSET_ADDRESS_SIZE;
39485796c8dcSSimon Schubert
39495796c8dcSSimon Schubert /* The location we want to set is the second
39505796c8dcSSimon Schubert parameter of the `jump_n'; that is `b-2' as
39515796c8dcSSimon Schubert an absolute address. `laststart' will be
39525796c8dcSSimon Schubert the `set_number_at' we're about to insert;
39535796c8dcSSimon Schubert `laststart+3' the number to set, the source
39545796c8dcSSimon Schubert for the relative address. But we are
39555796c8dcSSimon Schubert inserting into the middle of the pattern --
39565796c8dcSSimon Schubert so everything is getting moved up by 5.
39575796c8dcSSimon Schubert Conclusion: (b - 2) - (laststart + 3) + 5,
39585796c8dcSSimon Schubert i.e., b - laststart.
39595796c8dcSSimon Schubert
39605796c8dcSSimon Schubert We insert this at the beginning of the loop
39615796c8dcSSimon Schubert so that if we fail during matching, we'll
39625796c8dcSSimon Schubert reinitialize the bounds. */
39635796c8dcSSimon Schubert PREFIX(insert_op2) (set_number_at, laststart,
39645796c8dcSSimon Schubert b - laststart,
39655796c8dcSSimon Schubert upper_bound - 1, b);
39665796c8dcSSimon Schubert b += 1 + 2 * OFFSET_ADDRESS_SIZE;
39675796c8dcSSimon Schubert }
39685796c8dcSSimon Schubert }
39695796c8dcSSimon Schubert pending_exact = 0;
39705796c8dcSSimon Schubert break;
39715796c8dcSSimon Schubert
39725796c8dcSSimon Schubert invalid_interval:
39735796c8dcSSimon Schubert if (!(syntax & RE_INVALID_INTERVAL_ORD))
39745796c8dcSSimon Schubert FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
39755796c8dcSSimon Schubert unfetch_interval:
39765796c8dcSSimon Schubert /* Match the characters as literals. */
39775796c8dcSSimon Schubert p = beg_interval;
39785796c8dcSSimon Schubert c = '{';
39795796c8dcSSimon Schubert if (syntax & RE_NO_BK_BRACES)
39805796c8dcSSimon Schubert goto normal_char;
39815796c8dcSSimon Schubert else
39825796c8dcSSimon Schubert goto normal_backslash;
39835796c8dcSSimon Schubert }
39845796c8dcSSimon Schubert
39855796c8dcSSimon Schubert #ifdef emacs
39865796c8dcSSimon Schubert /* There is no way to specify the before_dot and after_dot
39875796c8dcSSimon Schubert operators. rms says this is ok. --karl */
39885796c8dcSSimon Schubert case '=':
39895796c8dcSSimon Schubert BUF_PUSH (at_dot);
39905796c8dcSSimon Schubert break;
39915796c8dcSSimon Schubert
39925796c8dcSSimon Schubert case 's':
39935796c8dcSSimon Schubert laststart = b;
39945796c8dcSSimon Schubert PATFETCH (c);
39955796c8dcSSimon Schubert BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
39965796c8dcSSimon Schubert break;
39975796c8dcSSimon Schubert
39985796c8dcSSimon Schubert case 'S':
39995796c8dcSSimon Schubert laststart = b;
40005796c8dcSSimon Schubert PATFETCH (c);
40015796c8dcSSimon Schubert BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
40025796c8dcSSimon Schubert break;
40035796c8dcSSimon Schubert #endif /* emacs */
40045796c8dcSSimon Schubert
40055796c8dcSSimon Schubert
40065796c8dcSSimon Schubert case 'w':
40075796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40085796c8dcSSimon Schubert goto normal_char;
40095796c8dcSSimon Schubert laststart = b;
40105796c8dcSSimon Schubert BUF_PUSH (wordchar);
40115796c8dcSSimon Schubert break;
40125796c8dcSSimon Schubert
40135796c8dcSSimon Schubert
40145796c8dcSSimon Schubert case 'W':
40155796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40165796c8dcSSimon Schubert goto normal_char;
40175796c8dcSSimon Schubert laststart = b;
40185796c8dcSSimon Schubert BUF_PUSH (notwordchar);
40195796c8dcSSimon Schubert break;
40205796c8dcSSimon Schubert
40215796c8dcSSimon Schubert
40225796c8dcSSimon Schubert case '<':
40235796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40245796c8dcSSimon Schubert goto normal_char;
40255796c8dcSSimon Schubert BUF_PUSH (wordbeg);
40265796c8dcSSimon Schubert break;
40275796c8dcSSimon Schubert
40285796c8dcSSimon Schubert case '>':
40295796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40305796c8dcSSimon Schubert goto normal_char;
40315796c8dcSSimon Schubert BUF_PUSH (wordend);
40325796c8dcSSimon Schubert break;
40335796c8dcSSimon Schubert
40345796c8dcSSimon Schubert case 'b':
40355796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40365796c8dcSSimon Schubert goto normal_char;
40375796c8dcSSimon Schubert BUF_PUSH (wordbound);
40385796c8dcSSimon Schubert break;
40395796c8dcSSimon Schubert
40405796c8dcSSimon Schubert case 'B':
40415796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40425796c8dcSSimon Schubert goto normal_char;
40435796c8dcSSimon Schubert BUF_PUSH (notwordbound);
40445796c8dcSSimon Schubert break;
40455796c8dcSSimon Schubert
40465796c8dcSSimon Schubert case '`':
40475796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40485796c8dcSSimon Schubert goto normal_char;
40495796c8dcSSimon Schubert BUF_PUSH (begbuf);
40505796c8dcSSimon Schubert break;
40515796c8dcSSimon Schubert
40525796c8dcSSimon Schubert case '\'':
40535796c8dcSSimon Schubert if (syntax & RE_NO_GNU_OPS)
40545796c8dcSSimon Schubert goto normal_char;
40555796c8dcSSimon Schubert BUF_PUSH (endbuf);
40565796c8dcSSimon Schubert break;
40575796c8dcSSimon Schubert
40585796c8dcSSimon Schubert case '1': case '2': case '3': case '4': case '5':
40595796c8dcSSimon Schubert case '6': case '7': case '8': case '9':
40605796c8dcSSimon Schubert if (syntax & RE_NO_BK_REFS)
40615796c8dcSSimon Schubert goto normal_char;
40625796c8dcSSimon Schubert
40635796c8dcSSimon Schubert c1 = c - '0';
40645796c8dcSSimon Schubert
40655796c8dcSSimon Schubert if (c1 > regnum)
40665796c8dcSSimon Schubert FREE_STACK_RETURN (REG_ESUBREG);
40675796c8dcSSimon Schubert
40685796c8dcSSimon Schubert /* Can't back reference to a subexpression if inside of it. */
40695796c8dcSSimon Schubert if (group_in_compile_stack (compile_stack, (regnum_t) c1))
40705796c8dcSSimon Schubert goto normal_char;
40715796c8dcSSimon Schubert
40725796c8dcSSimon Schubert laststart = b;
40735796c8dcSSimon Schubert BUF_PUSH_2 (duplicate, c1);
40745796c8dcSSimon Schubert break;
40755796c8dcSSimon Schubert
40765796c8dcSSimon Schubert
40775796c8dcSSimon Schubert case '+':
40785796c8dcSSimon Schubert case '?':
40795796c8dcSSimon Schubert if (syntax & RE_BK_PLUS_QM)
40805796c8dcSSimon Schubert goto handle_plus;
40815796c8dcSSimon Schubert else
40825796c8dcSSimon Schubert goto normal_backslash;
40835796c8dcSSimon Schubert
40845796c8dcSSimon Schubert default:
40855796c8dcSSimon Schubert normal_backslash:
40865796c8dcSSimon Schubert /* You might think it would be useful for \ to mean
40875796c8dcSSimon Schubert not to translate; but if we don't translate it
40885796c8dcSSimon Schubert it will never match anything. */
40895796c8dcSSimon Schubert c = TRANSLATE (c);
40905796c8dcSSimon Schubert goto normal_char;
40915796c8dcSSimon Schubert }
40925796c8dcSSimon Schubert break;
40935796c8dcSSimon Schubert
40945796c8dcSSimon Schubert
40955796c8dcSSimon Schubert default:
40965796c8dcSSimon Schubert /* Expects the character in `c'. */
40975796c8dcSSimon Schubert normal_char:
40985796c8dcSSimon Schubert /* If no exactn currently being built. */
40995796c8dcSSimon Schubert if (!pending_exact
41005796c8dcSSimon Schubert #ifdef WCHAR
41015796c8dcSSimon Schubert /* If last exactn handle binary(or character) and
41025796c8dcSSimon Schubert new exactn handle character(or binary). */
41035796c8dcSSimon Schubert || is_exactn_bin != is_binary[p - 1 - pattern]
41045796c8dcSSimon Schubert #endif /* WCHAR */
41055796c8dcSSimon Schubert
41065796c8dcSSimon Schubert /* If last exactn not at current position. */
41075796c8dcSSimon Schubert || pending_exact + *pending_exact + 1 != b
41085796c8dcSSimon Schubert
41095796c8dcSSimon Schubert /* We have only one byte following the exactn for the count. */
41105796c8dcSSimon Schubert || *pending_exact == (1 << BYTEWIDTH) - 1
41115796c8dcSSimon Schubert
41125796c8dcSSimon Schubert /* If followed by a repetition operator. */
41135796c8dcSSimon Schubert || *p == '*' || *p == '^'
41145796c8dcSSimon Schubert || ((syntax & RE_BK_PLUS_QM)
41155796c8dcSSimon Schubert ? *p == '\\' && (p[1] == '+' || p[1] == '?')
41165796c8dcSSimon Schubert : (*p == '+' || *p == '?'))
41175796c8dcSSimon Schubert || ((syntax & RE_INTERVALS)
41185796c8dcSSimon Schubert && ((syntax & RE_NO_BK_BRACES)
41195796c8dcSSimon Schubert ? *p == '{'
41205796c8dcSSimon Schubert : (p[0] == '\\' && p[1] == '{'))))
41215796c8dcSSimon Schubert {
41225796c8dcSSimon Schubert /* Start building a new exactn. */
41235796c8dcSSimon Schubert
41245796c8dcSSimon Schubert laststart = b;
41255796c8dcSSimon Schubert
41265796c8dcSSimon Schubert #ifdef WCHAR
41275796c8dcSSimon Schubert /* Is this exactn binary data or character? */
41285796c8dcSSimon Schubert is_exactn_bin = is_binary[p - 1 - pattern];
41295796c8dcSSimon Schubert if (is_exactn_bin)
41305796c8dcSSimon Schubert BUF_PUSH_2 (exactn_bin, 0);
41315796c8dcSSimon Schubert else
41325796c8dcSSimon Schubert BUF_PUSH_2 (exactn, 0);
41335796c8dcSSimon Schubert #else
41345796c8dcSSimon Schubert BUF_PUSH_2 (exactn, 0);
41355796c8dcSSimon Schubert #endif /* WCHAR */
41365796c8dcSSimon Schubert pending_exact = b - 1;
41375796c8dcSSimon Schubert }
41385796c8dcSSimon Schubert
41395796c8dcSSimon Schubert BUF_PUSH (c);
41405796c8dcSSimon Schubert (*pending_exact)++;
41415796c8dcSSimon Schubert break;
41425796c8dcSSimon Schubert } /* switch (c) */
41435796c8dcSSimon Schubert } /* while p != pend */
41445796c8dcSSimon Schubert
41455796c8dcSSimon Schubert
41465796c8dcSSimon Schubert /* Through the pattern now. */
41475796c8dcSSimon Schubert
41485796c8dcSSimon Schubert if (fixup_alt_jump)
41495796c8dcSSimon Schubert STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
41505796c8dcSSimon Schubert
41515796c8dcSSimon Schubert if (!COMPILE_STACK_EMPTY)
41525796c8dcSSimon Schubert FREE_STACK_RETURN (REG_EPAREN);
41535796c8dcSSimon Schubert
41545796c8dcSSimon Schubert /* If we don't want backtracking, force success
41555796c8dcSSimon Schubert the first time we reach the end of the compiled pattern. */
41565796c8dcSSimon Schubert if (syntax & RE_NO_POSIX_BACKTRACKING)
41575796c8dcSSimon Schubert BUF_PUSH (succeed);
41585796c8dcSSimon Schubert
41595796c8dcSSimon Schubert #ifdef WCHAR
41605796c8dcSSimon Schubert free (pattern);
41615796c8dcSSimon Schubert free (mbs_offset);
41625796c8dcSSimon Schubert free (is_binary);
41635796c8dcSSimon Schubert #endif
41645796c8dcSSimon Schubert free (compile_stack.stack);
41655796c8dcSSimon Schubert
41665796c8dcSSimon Schubert /* We have succeeded; set the length of the buffer. */
41675796c8dcSSimon Schubert #ifdef WCHAR
41685796c8dcSSimon Schubert bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
41695796c8dcSSimon Schubert #else
41705796c8dcSSimon Schubert bufp->used = b - bufp->buffer;
41715796c8dcSSimon Schubert #endif
41725796c8dcSSimon Schubert
41735796c8dcSSimon Schubert #ifdef DEBUG
41745796c8dcSSimon Schubert if (debug)
41755796c8dcSSimon Schubert {
41765796c8dcSSimon Schubert DEBUG_PRINT1 ("\nCompiled pattern: \n");
41775796c8dcSSimon Schubert PREFIX(print_compiled_pattern) (bufp);
41785796c8dcSSimon Schubert }
41795796c8dcSSimon Schubert #endif /* DEBUG */
41805796c8dcSSimon Schubert
41815796c8dcSSimon Schubert #ifndef MATCH_MAY_ALLOCATE
41825796c8dcSSimon Schubert /* Initialize the failure stack to the largest possible stack. This
41835796c8dcSSimon Schubert isn't necessary unless we're trying to avoid calling alloca in
41845796c8dcSSimon Schubert the search and match routines. */
41855796c8dcSSimon Schubert {
41865796c8dcSSimon Schubert int num_regs = bufp->re_nsub + 1;
41875796c8dcSSimon Schubert
41885796c8dcSSimon Schubert /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
41895796c8dcSSimon Schubert is strictly greater than re_max_failures, the largest possible stack
41905796c8dcSSimon Schubert is 2 * re_max_failures failure points. */
41915796c8dcSSimon Schubert if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
41925796c8dcSSimon Schubert {
41935796c8dcSSimon Schubert fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
41945796c8dcSSimon Schubert
41955796c8dcSSimon Schubert # ifdef emacs
41965796c8dcSSimon Schubert if (! fail_stack.stack)
41975796c8dcSSimon Schubert fail_stack.stack
41985796c8dcSSimon Schubert = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
41995796c8dcSSimon Schubert * sizeof (PREFIX(fail_stack_elt_t)));
42005796c8dcSSimon Schubert else
42015796c8dcSSimon Schubert fail_stack.stack
42025796c8dcSSimon Schubert = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
42035796c8dcSSimon Schubert (fail_stack.size
42045796c8dcSSimon Schubert * sizeof (PREFIX(fail_stack_elt_t))));
42055796c8dcSSimon Schubert # else /* not emacs */
42065796c8dcSSimon Schubert if (! fail_stack.stack)
42075796c8dcSSimon Schubert fail_stack.stack
42085796c8dcSSimon Schubert = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
42095796c8dcSSimon Schubert * sizeof (PREFIX(fail_stack_elt_t)));
42105796c8dcSSimon Schubert else
42115796c8dcSSimon Schubert fail_stack.stack
42125796c8dcSSimon Schubert = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
42135796c8dcSSimon Schubert (fail_stack.size
42145796c8dcSSimon Schubert * sizeof (PREFIX(fail_stack_elt_t))));
42155796c8dcSSimon Schubert # endif /* not emacs */
42165796c8dcSSimon Schubert }
42175796c8dcSSimon Schubert
42185796c8dcSSimon Schubert PREFIX(regex_grow_registers) (num_regs);
42195796c8dcSSimon Schubert }
42205796c8dcSSimon Schubert #endif /* not MATCH_MAY_ALLOCATE */
42215796c8dcSSimon Schubert
42225796c8dcSSimon Schubert return REG_NOERROR;
42235796c8dcSSimon Schubert } /* regex_compile */
42245796c8dcSSimon Schubert
42255796c8dcSSimon Schubert /* Subroutines for `regex_compile'. */
42265796c8dcSSimon Schubert
42275796c8dcSSimon Schubert /* Store OP at LOC followed by two-byte integer parameter ARG. */
42285796c8dcSSimon Schubert /* ifdef WCHAR, integer parameter is 1 wchar_t. */
42295796c8dcSSimon Schubert
42305796c8dcSSimon Schubert static void
PREFIX(store_op1)42315796c8dcSSimon Schubert PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
42325796c8dcSSimon Schubert {
42335796c8dcSSimon Schubert *loc = (UCHAR_T) op;
42345796c8dcSSimon Schubert STORE_NUMBER (loc + 1, arg);
42355796c8dcSSimon Schubert }
42365796c8dcSSimon Schubert
42375796c8dcSSimon Schubert
42385796c8dcSSimon Schubert /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
42395796c8dcSSimon Schubert /* ifdef WCHAR, integer parameter is 1 wchar_t. */
42405796c8dcSSimon Schubert
42415796c8dcSSimon Schubert static void
PREFIX(store_op2)42425796c8dcSSimon Schubert PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
42435796c8dcSSimon Schubert {
42445796c8dcSSimon Schubert *loc = (UCHAR_T) op;
42455796c8dcSSimon Schubert STORE_NUMBER (loc + 1, arg1);
42465796c8dcSSimon Schubert STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
42475796c8dcSSimon Schubert }
42485796c8dcSSimon Schubert
42495796c8dcSSimon Schubert
42505796c8dcSSimon Schubert /* Copy the bytes from LOC to END to open up three bytes of space at LOC
42515796c8dcSSimon Schubert for OP followed by two-byte integer parameter ARG. */
42525796c8dcSSimon Schubert /* ifdef WCHAR, integer parameter is 1 wchar_t. */
42535796c8dcSSimon Schubert
42545796c8dcSSimon Schubert static void
PREFIX(insert_op1)42555796c8dcSSimon Schubert PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
42565796c8dcSSimon Schubert {
42575796c8dcSSimon Schubert register UCHAR_T *pfrom = end;
42585796c8dcSSimon Schubert register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
42595796c8dcSSimon Schubert
42605796c8dcSSimon Schubert while (pfrom != loc)
42615796c8dcSSimon Schubert *--pto = *--pfrom;
42625796c8dcSSimon Schubert
42635796c8dcSSimon Schubert PREFIX(store_op1) (op, loc, arg);
42645796c8dcSSimon Schubert }
42655796c8dcSSimon Schubert
42665796c8dcSSimon Schubert
42675796c8dcSSimon Schubert /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
42685796c8dcSSimon Schubert /* ifdef WCHAR, integer parameter is 1 wchar_t. */
42695796c8dcSSimon Schubert
42705796c8dcSSimon Schubert static void
PREFIX(insert_op2)42715796c8dcSSimon Schubert PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
42725796c8dcSSimon Schubert int arg2, UCHAR_T *end)
42735796c8dcSSimon Schubert {
42745796c8dcSSimon Schubert register UCHAR_T *pfrom = end;
42755796c8dcSSimon Schubert register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
42765796c8dcSSimon Schubert
42775796c8dcSSimon Schubert while (pfrom != loc)
42785796c8dcSSimon Schubert *--pto = *--pfrom;
42795796c8dcSSimon Schubert
42805796c8dcSSimon Schubert PREFIX(store_op2) (op, loc, arg1, arg2);
42815796c8dcSSimon Schubert }
42825796c8dcSSimon Schubert
42835796c8dcSSimon Schubert
42845796c8dcSSimon Schubert /* P points to just after a ^ in PATTERN. Return true if that ^ comes
42855796c8dcSSimon Schubert after an alternative or a begin-subexpression. We assume there is at
42865796c8dcSSimon Schubert least one character before the ^. */
42875796c8dcSSimon Schubert
42885796c8dcSSimon Schubert static boolean
PREFIX(at_begline_loc_p)42895796c8dcSSimon Schubert PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
42905796c8dcSSimon Schubert reg_syntax_t syntax)
42915796c8dcSSimon Schubert {
42925796c8dcSSimon Schubert const CHAR_T *prev = p - 2;
42935796c8dcSSimon Schubert boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
42945796c8dcSSimon Schubert
42955796c8dcSSimon Schubert return
42965796c8dcSSimon Schubert /* After a subexpression? */
42975796c8dcSSimon Schubert (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
42985796c8dcSSimon Schubert /* After an alternative? */
42995796c8dcSSimon Schubert || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
43005796c8dcSSimon Schubert }
43015796c8dcSSimon Schubert
43025796c8dcSSimon Schubert
43035796c8dcSSimon Schubert /* The dual of at_begline_loc_p. This one is for $. We assume there is
43045796c8dcSSimon Schubert at least one character after the $, i.e., `P < PEND'. */
43055796c8dcSSimon Schubert
43065796c8dcSSimon Schubert static boolean
PREFIX(at_endline_loc_p)43075796c8dcSSimon Schubert PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
43085796c8dcSSimon Schubert reg_syntax_t syntax)
43095796c8dcSSimon Schubert {
43105796c8dcSSimon Schubert const CHAR_T *next = p;
43115796c8dcSSimon Schubert boolean next_backslash = *next == '\\';
43125796c8dcSSimon Schubert const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
43135796c8dcSSimon Schubert
43145796c8dcSSimon Schubert return
43155796c8dcSSimon Schubert /* Before a subexpression? */
43165796c8dcSSimon Schubert (syntax & RE_NO_BK_PARENS ? *next == ')'
43175796c8dcSSimon Schubert : next_backslash && next_next && *next_next == ')')
43185796c8dcSSimon Schubert /* Before an alternative? */
43195796c8dcSSimon Schubert || (syntax & RE_NO_BK_VBAR ? *next == '|'
43205796c8dcSSimon Schubert : next_backslash && next_next && *next_next == '|');
43215796c8dcSSimon Schubert }
43225796c8dcSSimon Schubert
43235796c8dcSSimon Schubert #else /* not INSIDE_RECURSION */
43245796c8dcSSimon Schubert
43255796c8dcSSimon Schubert /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
43265796c8dcSSimon Schubert false if it's not. */
43275796c8dcSSimon Schubert
43285796c8dcSSimon Schubert static boolean
group_in_compile_stack(compile_stack_type compile_stack,regnum_t regnum)43295796c8dcSSimon Schubert group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
43305796c8dcSSimon Schubert {
43315796c8dcSSimon Schubert int this_element;
43325796c8dcSSimon Schubert
43335796c8dcSSimon Schubert for (this_element = compile_stack.avail - 1;
43345796c8dcSSimon Schubert this_element >= 0;
43355796c8dcSSimon Schubert this_element--)
43365796c8dcSSimon Schubert if (compile_stack.stack[this_element].regnum == regnum)
43375796c8dcSSimon Schubert return true;
43385796c8dcSSimon Schubert
43395796c8dcSSimon Schubert return false;
43405796c8dcSSimon Schubert }
43415796c8dcSSimon Schubert #endif /* not INSIDE_RECURSION */
43425796c8dcSSimon Schubert
43435796c8dcSSimon Schubert #ifdef INSIDE_RECURSION
43445796c8dcSSimon Schubert
43455796c8dcSSimon Schubert #ifdef WCHAR
43465796c8dcSSimon Schubert /* This insert space, which size is "num", into the pattern at "loc".
43475796c8dcSSimon Schubert "end" must point the end of the allocated buffer. */
43485796c8dcSSimon Schubert static void
insert_space(int num,CHAR_T * loc,CHAR_T * end)43495796c8dcSSimon Schubert insert_space (int num, CHAR_T *loc, CHAR_T *end)
43505796c8dcSSimon Schubert {
43515796c8dcSSimon Schubert register CHAR_T *pto = end;
43525796c8dcSSimon Schubert register CHAR_T *pfrom = end - num;
43535796c8dcSSimon Schubert
43545796c8dcSSimon Schubert while (pfrom >= loc)
43555796c8dcSSimon Schubert *pto-- = *pfrom--;
43565796c8dcSSimon Schubert }
43575796c8dcSSimon Schubert #endif /* WCHAR */
43585796c8dcSSimon Schubert
43595796c8dcSSimon Schubert #ifdef WCHAR
43605796c8dcSSimon Schubert static reg_errcode_t
wcs_compile_range(CHAR_T range_start_char,const CHAR_T ** p_ptr,const CHAR_T * pend,RE_TRANSLATE_TYPE translate,reg_syntax_t syntax,CHAR_T * b,CHAR_T * char_set)43615796c8dcSSimon Schubert wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
43625796c8dcSSimon Schubert const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
43635796c8dcSSimon Schubert reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
43645796c8dcSSimon Schubert {
43655796c8dcSSimon Schubert const CHAR_T *p = *p_ptr;
43665796c8dcSSimon Schubert CHAR_T range_start, range_end;
43675796c8dcSSimon Schubert reg_errcode_t ret;
43685796c8dcSSimon Schubert # ifdef _LIBC
43695796c8dcSSimon Schubert uint32_t nrules;
43705796c8dcSSimon Schubert uint32_t start_val, end_val;
43715796c8dcSSimon Schubert # endif
43725796c8dcSSimon Schubert if (p == pend)
43735796c8dcSSimon Schubert return REG_ERANGE;
43745796c8dcSSimon Schubert
43755796c8dcSSimon Schubert # ifdef _LIBC
43765796c8dcSSimon Schubert nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
43775796c8dcSSimon Schubert if (nrules != 0)
43785796c8dcSSimon Schubert {
43795796c8dcSSimon Schubert const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
43805796c8dcSSimon Schubert _NL_COLLATE_COLLSEQWC);
43815796c8dcSSimon Schubert const unsigned char *extra = (const unsigned char *)
43825796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
43835796c8dcSSimon Schubert
43845796c8dcSSimon Schubert if (range_start_char < -1)
43855796c8dcSSimon Schubert {
43865796c8dcSSimon Schubert /* range_start is a collating symbol. */
43875796c8dcSSimon Schubert int32_t *wextra;
43885796c8dcSSimon Schubert /* Retreive the index and get collation sequence value. */
43895796c8dcSSimon Schubert wextra = (int32_t*)(extra + char_set[-range_start_char]);
43905796c8dcSSimon Schubert start_val = wextra[1 + *wextra];
43915796c8dcSSimon Schubert }
43925796c8dcSSimon Schubert else
43935796c8dcSSimon Schubert start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
43945796c8dcSSimon Schubert
43955796c8dcSSimon Schubert end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
43965796c8dcSSimon Schubert
43975796c8dcSSimon Schubert /* Report an error if the range is empty and the syntax prohibits
43985796c8dcSSimon Schubert this. */
43995796c8dcSSimon Schubert ret = ((syntax & RE_NO_EMPTY_RANGES)
44005796c8dcSSimon Schubert && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
44015796c8dcSSimon Schubert
44025796c8dcSSimon Schubert /* Insert space to the end of the char_ranges. */
44035796c8dcSSimon Schubert insert_space(2, b - char_set[5] - 2, b - 1);
44045796c8dcSSimon Schubert *(b - char_set[5] - 2) = (wchar_t)start_val;
44055796c8dcSSimon Schubert *(b - char_set[5] - 1) = (wchar_t)end_val;
44065796c8dcSSimon Schubert char_set[4]++; /* ranges_index */
44075796c8dcSSimon Schubert }
44085796c8dcSSimon Schubert else
44095796c8dcSSimon Schubert # endif
44105796c8dcSSimon Schubert {
44115796c8dcSSimon Schubert range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
44125796c8dcSSimon Schubert range_start_char;
44135796c8dcSSimon Schubert range_end = TRANSLATE (p[0]);
44145796c8dcSSimon Schubert /* Report an error if the range is empty and the syntax prohibits
44155796c8dcSSimon Schubert this. */
44165796c8dcSSimon Schubert ret = ((syntax & RE_NO_EMPTY_RANGES)
44175796c8dcSSimon Schubert && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
44185796c8dcSSimon Schubert
44195796c8dcSSimon Schubert /* Insert space to the end of the char_ranges. */
44205796c8dcSSimon Schubert insert_space(2, b - char_set[5] - 2, b - 1);
44215796c8dcSSimon Schubert *(b - char_set[5] - 2) = range_start;
44225796c8dcSSimon Schubert *(b - char_set[5] - 1) = range_end;
44235796c8dcSSimon Schubert char_set[4]++; /* ranges_index */
44245796c8dcSSimon Schubert }
44255796c8dcSSimon Schubert /* Have to increment the pointer into the pattern string, so the
44265796c8dcSSimon Schubert caller isn't still at the ending character. */
44275796c8dcSSimon Schubert (*p_ptr)++;
44285796c8dcSSimon Schubert
44295796c8dcSSimon Schubert return ret;
44305796c8dcSSimon Schubert }
44315796c8dcSSimon Schubert #else /* BYTE */
44325796c8dcSSimon Schubert /* Read the ending character of a range (in a bracket expression) from the
44335796c8dcSSimon Schubert uncompiled pattern *P_PTR (which ends at PEND). We assume the
44345796c8dcSSimon Schubert starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
44355796c8dcSSimon Schubert Then we set the translation of all bits between the starting and
44365796c8dcSSimon Schubert ending characters (inclusive) in the compiled pattern B.
44375796c8dcSSimon Schubert
44385796c8dcSSimon Schubert Return an error code.
44395796c8dcSSimon Schubert
44405796c8dcSSimon Schubert We use these short variable names so we can use the same macros as
44415796c8dcSSimon Schubert `regex_compile' itself. */
44425796c8dcSSimon Schubert
44435796c8dcSSimon Schubert static reg_errcode_t
byte_compile_range(unsigned int range_start_char,const char ** p_ptr,const char * pend,RE_TRANSLATE_TYPE translate,reg_syntax_t syntax,unsigned char * b)44445796c8dcSSimon Schubert byte_compile_range (unsigned int range_start_char, const char **p_ptr,
44455796c8dcSSimon Schubert const char *pend, RE_TRANSLATE_TYPE translate,
44465796c8dcSSimon Schubert reg_syntax_t syntax, unsigned char *b)
44475796c8dcSSimon Schubert {
44485796c8dcSSimon Schubert unsigned this_char;
44495796c8dcSSimon Schubert const char *p = *p_ptr;
44505796c8dcSSimon Schubert reg_errcode_t ret;
44515796c8dcSSimon Schubert # if _LIBC
44525796c8dcSSimon Schubert const unsigned char *collseq;
44535796c8dcSSimon Schubert unsigned int start_colseq;
44545796c8dcSSimon Schubert unsigned int end_colseq;
44555796c8dcSSimon Schubert # else
44565796c8dcSSimon Schubert unsigned end_char;
44575796c8dcSSimon Schubert # endif
44585796c8dcSSimon Schubert
44595796c8dcSSimon Schubert if (p == pend)
44605796c8dcSSimon Schubert return REG_ERANGE;
44615796c8dcSSimon Schubert
44625796c8dcSSimon Schubert /* Have to increment the pointer into the pattern string, so the
44635796c8dcSSimon Schubert caller isn't still at the ending character. */
44645796c8dcSSimon Schubert (*p_ptr)++;
44655796c8dcSSimon Schubert
44665796c8dcSSimon Schubert /* Report an error if the range is empty and the syntax prohibits this. */
44675796c8dcSSimon Schubert ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
44685796c8dcSSimon Schubert
44695796c8dcSSimon Schubert # if _LIBC
44705796c8dcSSimon Schubert collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
44715796c8dcSSimon Schubert _NL_COLLATE_COLLSEQMB);
44725796c8dcSSimon Schubert
44735796c8dcSSimon Schubert start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
44745796c8dcSSimon Schubert end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
44755796c8dcSSimon Schubert for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
44765796c8dcSSimon Schubert {
44775796c8dcSSimon Schubert unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
44785796c8dcSSimon Schubert
44795796c8dcSSimon Schubert if (start_colseq <= this_colseq && this_colseq <= end_colseq)
44805796c8dcSSimon Schubert {
44815796c8dcSSimon Schubert SET_LIST_BIT (TRANSLATE (this_char));
44825796c8dcSSimon Schubert ret = REG_NOERROR;
44835796c8dcSSimon Schubert }
44845796c8dcSSimon Schubert }
44855796c8dcSSimon Schubert # else
44865796c8dcSSimon Schubert /* Here we see why `this_char' has to be larger than an `unsigned
44875796c8dcSSimon Schubert char' -- we would otherwise go into an infinite loop, since all
44885796c8dcSSimon Schubert characters <= 0xff. */
44895796c8dcSSimon Schubert range_start_char = TRANSLATE (range_start_char);
44905796c8dcSSimon Schubert /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
44915796c8dcSSimon Schubert and some compilers cast it to int implicitly, so following for_loop
44925796c8dcSSimon Schubert may fall to (almost) infinite loop.
44935796c8dcSSimon Schubert e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
44945796c8dcSSimon Schubert To avoid this, we cast p[0] to unsigned int and truncate it. */
44955796c8dcSSimon Schubert end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
44965796c8dcSSimon Schubert
44975796c8dcSSimon Schubert for (this_char = range_start_char; this_char <= end_char; ++this_char)
44985796c8dcSSimon Schubert {
44995796c8dcSSimon Schubert SET_LIST_BIT (TRANSLATE (this_char));
45005796c8dcSSimon Schubert ret = REG_NOERROR;
45015796c8dcSSimon Schubert }
45025796c8dcSSimon Schubert # endif
45035796c8dcSSimon Schubert
45045796c8dcSSimon Schubert return ret;
45055796c8dcSSimon Schubert }
45065796c8dcSSimon Schubert #endif /* WCHAR */
45075796c8dcSSimon Schubert
45085796c8dcSSimon Schubert /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
45095796c8dcSSimon Schubert BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
45105796c8dcSSimon Schubert characters can start a string that matches the pattern. This fastmap
45115796c8dcSSimon Schubert is used by re_search to skip quickly over impossible starting points.
45125796c8dcSSimon Schubert
45135796c8dcSSimon Schubert The caller must supply the address of a (1 << BYTEWIDTH)-byte data
45145796c8dcSSimon Schubert area as BUFP->fastmap.
45155796c8dcSSimon Schubert
45165796c8dcSSimon Schubert We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
45175796c8dcSSimon Schubert the pattern buffer.
45185796c8dcSSimon Schubert
45195796c8dcSSimon Schubert Returns 0 if we succeed, -2 if an internal error. */
45205796c8dcSSimon Schubert
45215796c8dcSSimon Schubert #ifdef WCHAR
45225796c8dcSSimon Schubert /* local function for re_compile_fastmap.
45235796c8dcSSimon Schubert truncate wchar_t character to char. */
45245796c8dcSSimon Schubert static unsigned char truncate_wchar (CHAR_T c);
45255796c8dcSSimon Schubert
45265796c8dcSSimon Schubert static unsigned char
truncate_wchar(CHAR_T c)45275796c8dcSSimon Schubert truncate_wchar (CHAR_T c)
45285796c8dcSSimon Schubert {
45295796c8dcSSimon Schubert unsigned char buf[MB_CUR_MAX];
45305796c8dcSSimon Schubert mbstate_t state;
45315796c8dcSSimon Schubert int retval;
45325796c8dcSSimon Schubert memset (&state, '\0', sizeof (state));
45335796c8dcSSimon Schubert # ifdef _LIBC
45345796c8dcSSimon Schubert retval = __wcrtomb (buf, c, &state);
45355796c8dcSSimon Schubert # else
45365796c8dcSSimon Schubert retval = wcrtomb (buf, c, &state);
45375796c8dcSSimon Schubert # endif
45385796c8dcSSimon Schubert return retval > 0 ? buf[0] : (unsigned char) c;
45395796c8dcSSimon Schubert }
45405796c8dcSSimon Schubert #endif /* WCHAR */
45415796c8dcSSimon Schubert
45425796c8dcSSimon Schubert static int
PREFIX(re_compile_fastmap)45435796c8dcSSimon Schubert PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
45445796c8dcSSimon Schubert {
45455796c8dcSSimon Schubert int j, k;
45465796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE
45475796c8dcSSimon Schubert PREFIX(fail_stack_type) fail_stack;
45485796c8dcSSimon Schubert #endif
45495796c8dcSSimon Schubert #ifndef REGEX_MALLOC
45505796c8dcSSimon Schubert char *destination;
45515796c8dcSSimon Schubert #endif
45525796c8dcSSimon Schubert
45535796c8dcSSimon Schubert register char *fastmap = bufp->fastmap;
45545796c8dcSSimon Schubert
45555796c8dcSSimon Schubert #ifdef WCHAR
45565796c8dcSSimon Schubert /* We need to cast pattern to (wchar_t*), because we casted this compiled
45575796c8dcSSimon Schubert pattern to (char*) in regex_compile. */
45585796c8dcSSimon Schubert UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
45595796c8dcSSimon Schubert register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
45605796c8dcSSimon Schubert #else /* BYTE */
45615796c8dcSSimon Schubert UCHAR_T *pattern = bufp->buffer;
45625796c8dcSSimon Schubert register UCHAR_T *pend = pattern + bufp->used;
45635796c8dcSSimon Schubert #endif /* WCHAR */
45645796c8dcSSimon Schubert UCHAR_T *p = pattern;
45655796c8dcSSimon Schubert
45665796c8dcSSimon Schubert #ifdef REL_ALLOC
45675796c8dcSSimon Schubert /* This holds the pointer to the failure stack, when
45685796c8dcSSimon Schubert it is allocated relocatably. */
45695796c8dcSSimon Schubert fail_stack_elt_t *failure_stack_ptr;
45705796c8dcSSimon Schubert #endif
45715796c8dcSSimon Schubert
45725796c8dcSSimon Schubert /* Assume that each path through the pattern can be null until
45735796c8dcSSimon Schubert proven otherwise. We set this false at the bottom of switch
45745796c8dcSSimon Schubert statement, to which we get only if a particular path doesn't
45755796c8dcSSimon Schubert match the empty string. */
45765796c8dcSSimon Schubert boolean path_can_be_null = true;
45775796c8dcSSimon Schubert
45785796c8dcSSimon Schubert /* We aren't doing a `succeed_n' to begin with. */
45795796c8dcSSimon Schubert boolean succeed_n_p = false;
45805796c8dcSSimon Schubert
45815796c8dcSSimon Schubert assert (fastmap != NULL && p != NULL);
45825796c8dcSSimon Schubert
45835796c8dcSSimon Schubert INIT_FAIL_STACK ();
45845796c8dcSSimon Schubert bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
45855796c8dcSSimon Schubert bufp->fastmap_accurate = 1; /* It will be when we're done. */
45865796c8dcSSimon Schubert bufp->can_be_null = 0;
45875796c8dcSSimon Schubert
45885796c8dcSSimon Schubert while (1)
45895796c8dcSSimon Schubert {
45905796c8dcSSimon Schubert if (p == pend || *p == (UCHAR_T) succeed)
45915796c8dcSSimon Schubert {
45925796c8dcSSimon Schubert /* We have reached the (effective) end of pattern. */
45935796c8dcSSimon Schubert if (!FAIL_STACK_EMPTY ())
45945796c8dcSSimon Schubert {
45955796c8dcSSimon Schubert bufp->can_be_null |= path_can_be_null;
45965796c8dcSSimon Schubert
45975796c8dcSSimon Schubert /* Reset for next path. */
45985796c8dcSSimon Schubert path_can_be_null = true;
45995796c8dcSSimon Schubert
46005796c8dcSSimon Schubert p = fail_stack.stack[--fail_stack.avail].pointer;
46015796c8dcSSimon Schubert
46025796c8dcSSimon Schubert continue;
46035796c8dcSSimon Schubert }
46045796c8dcSSimon Schubert else
46055796c8dcSSimon Schubert break;
46065796c8dcSSimon Schubert }
46075796c8dcSSimon Schubert
46085796c8dcSSimon Schubert /* We should never be about to go beyond the end of the pattern. */
46095796c8dcSSimon Schubert assert (p < pend);
46105796c8dcSSimon Schubert
46115796c8dcSSimon Schubert switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
46125796c8dcSSimon Schubert {
46135796c8dcSSimon Schubert
46145796c8dcSSimon Schubert /* I guess the idea here is to simply not bother with a fastmap
46155796c8dcSSimon Schubert if a backreference is used, since it's too hard to figure out
46165796c8dcSSimon Schubert the fastmap for the corresponding group. Setting
46175796c8dcSSimon Schubert `can_be_null' stops `re_search_2' from using the fastmap, so
46185796c8dcSSimon Schubert that is all we do. */
46195796c8dcSSimon Schubert case duplicate:
46205796c8dcSSimon Schubert bufp->can_be_null = 1;
46215796c8dcSSimon Schubert goto done;
46225796c8dcSSimon Schubert
46235796c8dcSSimon Schubert
46245796c8dcSSimon Schubert /* Following are the cases which match a character. These end
46255796c8dcSSimon Schubert with `break'. */
46265796c8dcSSimon Schubert
46275796c8dcSSimon Schubert #ifdef WCHAR
46285796c8dcSSimon Schubert case exactn:
46295796c8dcSSimon Schubert fastmap[truncate_wchar(p[1])] = 1;
46305796c8dcSSimon Schubert break;
46315796c8dcSSimon Schubert #else /* BYTE */
46325796c8dcSSimon Schubert case exactn:
46335796c8dcSSimon Schubert fastmap[p[1]] = 1;
46345796c8dcSSimon Schubert break;
46355796c8dcSSimon Schubert #endif /* WCHAR */
46365796c8dcSSimon Schubert #ifdef MBS_SUPPORT
46375796c8dcSSimon Schubert case exactn_bin:
46385796c8dcSSimon Schubert fastmap[p[1]] = 1;
46395796c8dcSSimon Schubert break;
46405796c8dcSSimon Schubert #endif
46415796c8dcSSimon Schubert
46425796c8dcSSimon Schubert #ifdef WCHAR
46435796c8dcSSimon Schubert /* It is hard to distinguish fastmap from (multi byte) characters
46445796c8dcSSimon Schubert which depends on current locale. */
46455796c8dcSSimon Schubert case charset:
46465796c8dcSSimon Schubert case charset_not:
46475796c8dcSSimon Schubert case wordchar:
46485796c8dcSSimon Schubert case notwordchar:
46495796c8dcSSimon Schubert bufp->can_be_null = 1;
46505796c8dcSSimon Schubert goto done;
46515796c8dcSSimon Schubert #else /* BYTE */
46525796c8dcSSimon Schubert case charset:
46535796c8dcSSimon Schubert for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
46545796c8dcSSimon Schubert if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
46555796c8dcSSimon Schubert fastmap[j] = 1;
46565796c8dcSSimon Schubert break;
46575796c8dcSSimon Schubert
46585796c8dcSSimon Schubert
46595796c8dcSSimon Schubert case charset_not:
46605796c8dcSSimon Schubert /* Chars beyond end of map must be allowed. */
46615796c8dcSSimon Schubert for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
46625796c8dcSSimon Schubert fastmap[j] = 1;
46635796c8dcSSimon Schubert
46645796c8dcSSimon Schubert for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
46655796c8dcSSimon Schubert if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
46665796c8dcSSimon Schubert fastmap[j] = 1;
46675796c8dcSSimon Schubert break;
46685796c8dcSSimon Schubert
46695796c8dcSSimon Schubert
46705796c8dcSSimon Schubert case wordchar:
46715796c8dcSSimon Schubert for (j = 0; j < (1 << BYTEWIDTH); j++)
46725796c8dcSSimon Schubert if (SYNTAX (j) == Sword)
46735796c8dcSSimon Schubert fastmap[j] = 1;
46745796c8dcSSimon Schubert break;
46755796c8dcSSimon Schubert
46765796c8dcSSimon Schubert
46775796c8dcSSimon Schubert case notwordchar:
46785796c8dcSSimon Schubert for (j = 0; j < (1 << BYTEWIDTH); j++)
46795796c8dcSSimon Schubert if (SYNTAX (j) != Sword)
46805796c8dcSSimon Schubert fastmap[j] = 1;
46815796c8dcSSimon Schubert break;
46825796c8dcSSimon Schubert #endif /* WCHAR */
46835796c8dcSSimon Schubert
46845796c8dcSSimon Schubert case anychar:
46855796c8dcSSimon Schubert {
46865796c8dcSSimon Schubert int fastmap_newline = fastmap['\n'];
46875796c8dcSSimon Schubert
46885796c8dcSSimon Schubert /* `.' matches anything ... */
46895796c8dcSSimon Schubert for (j = 0; j < (1 << BYTEWIDTH); j++)
46905796c8dcSSimon Schubert fastmap[j] = 1;
46915796c8dcSSimon Schubert
46925796c8dcSSimon Schubert /* ... except perhaps newline. */
46935796c8dcSSimon Schubert if (!(bufp->syntax & RE_DOT_NEWLINE))
46945796c8dcSSimon Schubert fastmap['\n'] = fastmap_newline;
46955796c8dcSSimon Schubert
46965796c8dcSSimon Schubert /* Return if we have already set `can_be_null'; if we have,
46975796c8dcSSimon Schubert then the fastmap is irrelevant. Something's wrong here. */
46985796c8dcSSimon Schubert else if (bufp->can_be_null)
46995796c8dcSSimon Schubert goto done;
47005796c8dcSSimon Schubert
47015796c8dcSSimon Schubert /* Otherwise, have to check alternative paths. */
47025796c8dcSSimon Schubert break;
47035796c8dcSSimon Schubert }
47045796c8dcSSimon Schubert
47055796c8dcSSimon Schubert #ifdef emacs
47065796c8dcSSimon Schubert case syntaxspec:
47075796c8dcSSimon Schubert k = *p++;
47085796c8dcSSimon Schubert for (j = 0; j < (1 << BYTEWIDTH); j++)
47095796c8dcSSimon Schubert if (SYNTAX (j) == (enum syntaxcode) k)
47105796c8dcSSimon Schubert fastmap[j] = 1;
47115796c8dcSSimon Schubert break;
47125796c8dcSSimon Schubert
47135796c8dcSSimon Schubert
47145796c8dcSSimon Schubert case notsyntaxspec:
47155796c8dcSSimon Schubert k = *p++;
47165796c8dcSSimon Schubert for (j = 0; j < (1 << BYTEWIDTH); j++)
47175796c8dcSSimon Schubert if (SYNTAX (j) != (enum syntaxcode) k)
47185796c8dcSSimon Schubert fastmap[j] = 1;
47195796c8dcSSimon Schubert break;
47205796c8dcSSimon Schubert
47215796c8dcSSimon Schubert
47225796c8dcSSimon Schubert /* All cases after this match the empty string. These end with
47235796c8dcSSimon Schubert `continue'. */
47245796c8dcSSimon Schubert
47255796c8dcSSimon Schubert
47265796c8dcSSimon Schubert case before_dot:
47275796c8dcSSimon Schubert case at_dot:
47285796c8dcSSimon Schubert case after_dot:
47295796c8dcSSimon Schubert continue;
47305796c8dcSSimon Schubert #endif /* emacs */
47315796c8dcSSimon Schubert
47325796c8dcSSimon Schubert
47335796c8dcSSimon Schubert case no_op:
47345796c8dcSSimon Schubert case begline:
47355796c8dcSSimon Schubert case endline:
47365796c8dcSSimon Schubert case begbuf:
47375796c8dcSSimon Schubert case endbuf:
47385796c8dcSSimon Schubert case wordbound:
47395796c8dcSSimon Schubert case notwordbound:
47405796c8dcSSimon Schubert case wordbeg:
47415796c8dcSSimon Schubert case wordend:
47425796c8dcSSimon Schubert case push_dummy_failure:
47435796c8dcSSimon Schubert continue;
47445796c8dcSSimon Schubert
47455796c8dcSSimon Schubert
47465796c8dcSSimon Schubert case jump_n:
47475796c8dcSSimon Schubert case pop_failure_jump:
47485796c8dcSSimon Schubert case maybe_pop_jump:
47495796c8dcSSimon Schubert case jump:
47505796c8dcSSimon Schubert case jump_past_alt:
47515796c8dcSSimon Schubert case dummy_failure_jump:
47525796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (j, p);
47535796c8dcSSimon Schubert p += j;
47545796c8dcSSimon Schubert if (j > 0)
47555796c8dcSSimon Schubert continue;
47565796c8dcSSimon Schubert
47575796c8dcSSimon Schubert /* Jump backward implies we just went through the body of a
47585796c8dcSSimon Schubert loop and matched nothing. Opcode jumped to should be
47595796c8dcSSimon Schubert `on_failure_jump' or `succeed_n'. Just treat it like an
47605796c8dcSSimon Schubert ordinary jump. For a * loop, it has pushed its failure
47615796c8dcSSimon Schubert point already; if so, discard that as redundant. */
47625796c8dcSSimon Schubert if ((re_opcode_t) *p != on_failure_jump
47635796c8dcSSimon Schubert && (re_opcode_t) *p != succeed_n)
47645796c8dcSSimon Schubert continue;
47655796c8dcSSimon Schubert
47665796c8dcSSimon Schubert p++;
47675796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (j, p);
47685796c8dcSSimon Schubert p += j;
47695796c8dcSSimon Schubert
47705796c8dcSSimon Schubert /* If what's on the stack is where we are now, pop it. */
47715796c8dcSSimon Schubert if (!FAIL_STACK_EMPTY ()
47725796c8dcSSimon Schubert && fail_stack.stack[fail_stack.avail - 1].pointer == p)
47735796c8dcSSimon Schubert fail_stack.avail--;
47745796c8dcSSimon Schubert
47755796c8dcSSimon Schubert continue;
47765796c8dcSSimon Schubert
47775796c8dcSSimon Schubert
47785796c8dcSSimon Schubert case on_failure_jump:
47795796c8dcSSimon Schubert case on_failure_keep_string_jump:
47805796c8dcSSimon Schubert handle_on_failure_jump:
47815796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (j, p);
47825796c8dcSSimon Schubert
47835796c8dcSSimon Schubert /* For some patterns, e.g., `(a?)?', `p+j' here points to the
47845796c8dcSSimon Schubert end of the pattern. We don't want to push such a point,
47855796c8dcSSimon Schubert since when we restore it above, entering the switch will
47865796c8dcSSimon Schubert increment `p' past the end of the pattern. We don't need
47875796c8dcSSimon Schubert to push such a point since we obviously won't find any more
47885796c8dcSSimon Schubert fastmap entries beyond `pend'. Such a pattern can match
47895796c8dcSSimon Schubert the null string, though. */
47905796c8dcSSimon Schubert if (p + j < pend)
47915796c8dcSSimon Schubert {
47925796c8dcSSimon Schubert if (!PUSH_PATTERN_OP (p + j, fail_stack))
47935796c8dcSSimon Schubert {
47945796c8dcSSimon Schubert RESET_FAIL_STACK ();
47955796c8dcSSimon Schubert return -2;
47965796c8dcSSimon Schubert }
47975796c8dcSSimon Schubert }
47985796c8dcSSimon Schubert else
47995796c8dcSSimon Schubert bufp->can_be_null = 1;
48005796c8dcSSimon Schubert
48015796c8dcSSimon Schubert if (succeed_n_p)
48025796c8dcSSimon Schubert {
48035796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
48045796c8dcSSimon Schubert succeed_n_p = false;
48055796c8dcSSimon Schubert }
48065796c8dcSSimon Schubert
48075796c8dcSSimon Schubert continue;
48085796c8dcSSimon Schubert
48095796c8dcSSimon Schubert
48105796c8dcSSimon Schubert case succeed_n:
48115796c8dcSSimon Schubert /* Get to the number of times to succeed. */
48125796c8dcSSimon Schubert p += OFFSET_ADDRESS_SIZE;
48135796c8dcSSimon Schubert
48145796c8dcSSimon Schubert /* Increment p past the n for when k != 0. */
48155796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (k, p);
48165796c8dcSSimon Schubert if (k == 0)
48175796c8dcSSimon Schubert {
48185796c8dcSSimon Schubert p -= 2 * OFFSET_ADDRESS_SIZE;
48195796c8dcSSimon Schubert succeed_n_p = true; /* Spaghetti code alert. */
48205796c8dcSSimon Schubert goto handle_on_failure_jump;
48215796c8dcSSimon Schubert }
48225796c8dcSSimon Schubert continue;
48235796c8dcSSimon Schubert
48245796c8dcSSimon Schubert
48255796c8dcSSimon Schubert case set_number_at:
48265796c8dcSSimon Schubert p += 2 * OFFSET_ADDRESS_SIZE;
48275796c8dcSSimon Schubert continue;
48285796c8dcSSimon Schubert
48295796c8dcSSimon Schubert
48305796c8dcSSimon Schubert case start_memory:
48315796c8dcSSimon Schubert case stop_memory:
48325796c8dcSSimon Schubert p += 2;
48335796c8dcSSimon Schubert continue;
48345796c8dcSSimon Schubert
48355796c8dcSSimon Schubert
48365796c8dcSSimon Schubert default:
48375796c8dcSSimon Schubert abort (); /* We have listed all the cases. */
48385796c8dcSSimon Schubert } /* switch *p++ */
48395796c8dcSSimon Schubert
48405796c8dcSSimon Schubert /* Getting here means we have found the possible starting
48415796c8dcSSimon Schubert characters for one path of the pattern -- and that the empty
48425796c8dcSSimon Schubert string does not match. We need not follow this path further.
48435796c8dcSSimon Schubert Instead, look at the next alternative (remembered on the
48445796c8dcSSimon Schubert stack), or quit if no more. The test at the top of the loop
48455796c8dcSSimon Schubert does these things. */
48465796c8dcSSimon Schubert path_can_be_null = false;
48475796c8dcSSimon Schubert p = pend;
48485796c8dcSSimon Schubert } /* while p */
48495796c8dcSSimon Schubert
48505796c8dcSSimon Schubert /* Set `can_be_null' for the last path (also the first path, if the
48515796c8dcSSimon Schubert pattern is empty). */
48525796c8dcSSimon Schubert bufp->can_be_null |= path_can_be_null;
48535796c8dcSSimon Schubert
48545796c8dcSSimon Schubert done:
48555796c8dcSSimon Schubert RESET_FAIL_STACK ();
48565796c8dcSSimon Schubert return 0;
48575796c8dcSSimon Schubert }
48585796c8dcSSimon Schubert
48595796c8dcSSimon Schubert #else /* not INSIDE_RECURSION */
48605796c8dcSSimon Schubert
48615796c8dcSSimon Schubert int
re_compile_fastmap(struct re_pattern_buffer * bufp)48625796c8dcSSimon Schubert re_compile_fastmap (struct re_pattern_buffer *bufp)
48635796c8dcSSimon Schubert {
48645796c8dcSSimon Schubert # ifdef MBS_SUPPORT
48655796c8dcSSimon Schubert if (MB_CUR_MAX != 1)
48665796c8dcSSimon Schubert return wcs_re_compile_fastmap(bufp);
48675796c8dcSSimon Schubert else
48685796c8dcSSimon Schubert # endif
48695796c8dcSSimon Schubert return byte_re_compile_fastmap(bufp);
48705796c8dcSSimon Schubert } /* re_compile_fastmap */
48715796c8dcSSimon Schubert #ifdef _LIBC
weak_alias(__re_compile_fastmap,re_compile_fastmap)48725796c8dcSSimon Schubert weak_alias (__re_compile_fastmap, re_compile_fastmap)
48735796c8dcSSimon Schubert #endif
48745796c8dcSSimon Schubert
48755796c8dcSSimon Schubert
48765796c8dcSSimon Schubert /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
48775796c8dcSSimon Schubert ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
48785796c8dcSSimon Schubert this memory for recording register information. STARTS and ENDS
48795796c8dcSSimon Schubert must be allocated using the malloc library routine, and must each
48805796c8dcSSimon Schubert be at least NUM_REGS * sizeof (regoff_t) bytes long.
48815796c8dcSSimon Schubert
48825796c8dcSSimon Schubert If NUM_REGS == 0, then subsequent matches should allocate their own
48835796c8dcSSimon Schubert register data.
48845796c8dcSSimon Schubert
48855796c8dcSSimon Schubert Unless this function is called, the first search or match using
48865796c8dcSSimon Schubert PATTERN_BUFFER will allocate its own register data, without
48875796c8dcSSimon Schubert freeing the old data. */
48885796c8dcSSimon Schubert
48895796c8dcSSimon Schubert void
48905796c8dcSSimon Schubert re_set_registers (struct re_pattern_buffer *bufp,
48915796c8dcSSimon Schubert struct re_registers *regs, unsigned num_regs,
48925796c8dcSSimon Schubert regoff_t *starts, regoff_t *ends)
48935796c8dcSSimon Schubert {
48945796c8dcSSimon Schubert if (num_regs)
48955796c8dcSSimon Schubert {
48965796c8dcSSimon Schubert bufp->regs_allocated = REGS_REALLOCATE;
48975796c8dcSSimon Schubert regs->num_regs = num_regs;
48985796c8dcSSimon Schubert regs->start = starts;
48995796c8dcSSimon Schubert regs->end = ends;
49005796c8dcSSimon Schubert }
49015796c8dcSSimon Schubert else
49025796c8dcSSimon Schubert {
49035796c8dcSSimon Schubert bufp->regs_allocated = REGS_UNALLOCATED;
49045796c8dcSSimon Schubert regs->num_regs = 0;
49055796c8dcSSimon Schubert regs->start = regs->end = (regoff_t *) 0;
49065796c8dcSSimon Schubert }
49075796c8dcSSimon Schubert }
49085796c8dcSSimon Schubert #ifdef _LIBC
weak_alias(__re_set_registers,re_set_registers)49095796c8dcSSimon Schubert weak_alias (__re_set_registers, re_set_registers)
49105796c8dcSSimon Schubert #endif
49115796c8dcSSimon Schubert
49125796c8dcSSimon Schubert /* Searching routines. */
49135796c8dcSSimon Schubert
49145796c8dcSSimon Schubert /* Like re_search_2, below, but only one string is specified, and
49155796c8dcSSimon Schubert doesn't let you say where to stop matching. */
49165796c8dcSSimon Schubert
49175796c8dcSSimon Schubert int
49185796c8dcSSimon Schubert re_search (struct re_pattern_buffer *bufp, const char *string, int size,
49195796c8dcSSimon Schubert int startpos, int range, struct re_registers *regs)
49205796c8dcSSimon Schubert {
49215796c8dcSSimon Schubert return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
49225796c8dcSSimon Schubert regs, size);
49235796c8dcSSimon Schubert }
49245796c8dcSSimon Schubert #ifdef _LIBC
weak_alias(__re_search,re_search)49255796c8dcSSimon Schubert weak_alias (__re_search, re_search)
49265796c8dcSSimon Schubert #endif
49275796c8dcSSimon Schubert
49285796c8dcSSimon Schubert
49295796c8dcSSimon Schubert /* Using the compiled pattern in BUFP->buffer, first tries to match the
49305796c8dcSSimon Schubert virtual concatenation of STRING1 and STRING2, starting first at index
49315796c8dcSSimon Schubert STARTPOS, then at STARTPOS + 1, and so on.
49325796c8dcSSimon Schubert
49335796c8dcSSimon Schubert STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
49345796c8dcSSimon Schubert
49355796c8dcSSimon Schubert RANGE is how far to scan while trying to match. RANGE = 0 means try
49365796c8dcSSimon Schubert only at STARTPOS; in general, the last start tried is STARTPOS +
49375796c8dcSSimon Schubert RANGE.
49385796c8dcSSimon Schubert
49395796c8dcSSimon Schubert In REGS, return the indices of the virtual concatenation of STRING1
49405796c8dcSSimon Schubert and STRING2 that matched the entire BUFP->buffer and its contained
49415796c8dcSSimon Schubert subexpressions.
49425796c8dcSSimon Schubert
49435796c8dcSSimon Schubert Do not consider matching one past the index STOP in the virtual
49445796c8dcSSimon Schubert concatenation of STRING1 and STRING2.
49455796c8dcSSimon Schubert
49465796c8dcSSimon Schubert We return either the position in the strings at which the match was
49475796c8dcSSimon Schubert found, -1 if no match, or -2 if error (such as failure
49485796c8dcSSimon Schubert stack overflow). */
49495796c8dcSSimon Schubert
49505796c8dcSSimon Schubert int
49515796c8dcSSimon Schubert re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
49525796c8dcSSimon Schubert const char *string2, int size2, int startpos, int range,
49535796c8dcSSimon Schubert struct re_registers *regs, int stop)
49545796c8dcSSimon Schubert {
49555796c8dcSSimon Schubert # ifdef MBS_SUPPORT
49565796c8dcSSimon Schubert if (MB_CUR_MAX != 1)
49575796c8dcSSimon Schubert return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
49585796c8dcSSimon Schubert range, regs, stop);
49595796c8dcSSimon Schubert else
49605796c8dcSSimon Schubert # endif
49615796c8dcSSimon Schubert return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
49625796c8dcSSimon Schubert range, regs, stop);
49635796c8dcSSimon Schubert } /* re_search_2 */
49645796c8dcSSimon Schubert #ifdef _LIBC
weak_alias(__re_search_2,re_search_2)49655796c8dcSSimon Schubert weak_alias (__re_search_2, re_search_2)
49665796c8dcSSimon Schubert #endif
49675796c8dcSSimon Schubert
49685796c8dcSSimon Schubert #endif /* not INSIDE_RECURSION */
49695796c8dcSSimon Schubert
49705796c8dcSSimon Schubert #ifdef INSIDE_RECURSION
49715796c8dcSSimon Schubert
49725796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE
49735796c8dcSSimon Schubert # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
49745796c8dcSSimon Schubert #else
4975a45ae5f8SJohn Marino # define FREE_VAR(var) free (var); var = NULL
49765796c8dcSSimon Schubert #endif
49775796c8dcSSimon Schubert
49785796c8dcSSimon Schubert #ifdef WCHAR
49795796c8dcSSimon Schubert # define MAX_ALLOCA_SIZE 2000
49805796c8dcSSimon Schubert
49815796c8dcSSimon Schubert # define FREE_WCS_BUFFERS() \
49825796c8dcSSimon Schubert do { \
49835796c8dcSSimon Schubert if (size1 > MAX_ALLOCA_SIZE) \
49845796c8dcSSimon Schubert { \
49855796c8dcSSimon Schubert free (wcs_string1); \
49865796c8dcSSimon Schubert free (mbs_offset1); \
49875796c8dcSSimon Schubert } \
49885796c8dcSSimon Schubert else \
49895796c8dcSSimon Schubert { \
49905796c8dcSSimon Schubert FREE_VAR (wcs_string1); \
49915796c8dcSSimon Schubert FREE_VAR (mbs_offset1); \
49925796c8dcSSimon Schubert } \
49935796c8dcSSimon Schubert if (size2 > MAX_ALLOCA_SIZE) \
49945796c8dcSSimon Schubert { \
49955796c8dcSSimon Schubert free (wcs_string2); \
49965796c8dcSSimon Schubert free (mbs_offset2); \
49975796c8dcSSimon Schubert } \
49985796c8dcSSimon Schubert else \
49995796c8dcSSimon Schubert { \
50005796c8dcSSimon Schubert FREE_VAR (wcs_string2); \
50015796c8dcSSimon Schubert FREE_VAR (mbs_offset2); \
50025796c8dcSSimon Schubert } \
50035796c8dcSSimon Schubert } while (0)
50045796c8dcSSimon Schubert
50055796c8dcSSimon Schubert #endif
50065796c8dcSSimon Schubert
50075796c8dcSSimon Schubert
50085796c8dcSSimon Schubert static int
50095796c8dcSSimon Schubert PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
50105796c8dcSSimon Schubert int size1, const char *string2, int size2,
50115796c8dcSSimon Schubert int startpos, int range,
50125796c8dcSSimon Schubert struct re_registers *regs, int stop)
50135796c8dcSSimon Schubert {
50145796c8dcSSimon Schubert int val;
50155796c8dcSSimon Schubert register char *fastmap = bufp->fastmap;
50165796c8dcSSimon Schubert register RE_TRANSLATE_TYPE translate = bufp->translate;
50175796c8dcSSimon Schubert int total_size = size1 + size2;
50185796c8dcSSimon Schubert int endpos = startpos + range;
50195796c8dcSSimon Schubert #ifdef WCHAR
50205796c8dcSSimon Schubert /* We need wchar_t* buffers correspond to cstring1, cstring2. */
50215796c8dcSSimon Schubert wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
50225796c8dcSSimon Schubert /* We need the size of wchar_t buffers correspond to csize1, csize2. */
50235796c8dcSSimon Schubert int wcs_size1 = 0, wcs_size2 = 0;
50245796c8dcSSimon Schubert /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
50255796c8dcSSimon Schubert int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
50265796c8dcSSimon Schubert /* They hold whether each wchar_t is binary data or not. */
50275796c8dcSSimon Schubert char *is_binary = NULL;
50285796c8dcSSimon Schubert #endif /* WCHAR */
50295796c8dcSSimon Schubert
50305796c8dcSSimon Schubert /* Check for out-of-range STARTPOS. */
50315796c8dcSSimon Schubert if (startpos < 0 || startpos > total_size)
50325796c8dcSSimon Schubert return -1;
50335796c8dcSSimon Schubert
50345796c8dcSSimon Schubert /* Fix up RANGE if it might eventually take us outside
50355796c8dcSSimon Schubert the virtual concatenation of STRING1 and STRING2.
50365796c8dcSSimon Schubert Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
50375796c8dcSSimon Schubert if (endpos < 0)
50385796c8dcSSimon Schubert range = 0 - startpos;
50395796c8dcSSimon Schubert else if (endpos > total_size)
50405796c8dcSSimon Schubert range = total_size - startpos;
50415796c8dcSSimon Schubert
50425796c8dcSSimon Schubert /* If the search isn't to be a backwards one, don't waste time in a
50435796c8dcSSimon Schubert search for a pattern that must be anchored. */
50445796c8dcSSimon Schubert if (bufp->used > 0 && range > 0
50455796c8dcSSimon Schubert && ((re_opcode_t) bufp->buffer[0] == begbuf
50465796c8dcSSimon Schubert /* `begline' is like `begbuf' if it cannot match at newlines. */
50475796c8dcSSimon Schubert || ((re_opcode_t) bufp->buffer[0] == begline
50485796c8dcSSimon Schubert && !bufp->newline_anchor)))
50495796c8dcSSimon Schubert {
50505796c8dcSSimon Schubert if (startpos > 0)
50515796c8dcSSimon Schubert return -1;
50525796c8dcSSimon Schubert else
50535796c8dcSSimon Schubert range = 1;
50545796c8dcSSimon Schubert }
50555796c8dcSSimon Schubert
50565796c8dcSSimon Schubert #ifdef emacs
50575796c8dcSSimon Schubert /* In a forward search for something that starts with \=.
50585796c8dcSSimon Schubert don't keep searching past point. */
50595796c8dcSSimon Schubert if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
50605796c8dcSSimon Schubert {
50615796c8dcSSimon Schubert range = PT - startpos;
50625796c8dcSSimon Schubert if (range <= 0)
50635796c8dcSSimon Schubert return -1;
50645796c8dcSSimon Schubert }
50655796c8dcSSimon Schubert #endif /* emacs */
50665796c8dcSSimon Schubert
50675796c8dcSSimon Schubert /* Update the fastmap now if not correct already. */
50685796c8dcSSimon Schubert if (fastmap && !bufp->fastmap_accurate)
50695796c8dcSSimon Schubert if (re_compile_fastmap (bufp) == -2)
50705796c8dcSSimon Schubert return -2;
50715796c8dcSSimon Schubert
50725796c8dcSSimon Schubert #ifdef WCHAR
50735796c8dcSSimon Schubert /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
50745796c8dcSSimon Schubert fill them with converted string. */
50755796c8dcSSimon Schubert if (size1 != 0)
50765796c8dcSSimon Schubert {
50775796c8dcSSimon Schubert if (size1 > MAX_ALLOCA_SIZE)
50785796c8dcSSimon Schubert {
50795796c8dcSSimon Schubert wcs_string1 = TALLOC (size1 + 1, CHAR_T);
50805796c8dcSSimon Schubert mbs_offset1 = TALLOC (size1 + 1, int);
50815796c8dcSSimon Schubert is_binary = TALLOC (size1 + 1, char);
50825796c8dcSSimon Schubert }
50835796c8dcSSimon Schubert else
50845796c8dcSSimon Schubert {
50855796c8dcSSimon Schubert wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
50865796c8dcSSimon Schubert mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
50875796c8dcSSimon Schubert is_binary = REGEX_TALLOC (size1 + 1, char);
50885796c8dcSSimon Schubert }
50895796c8dcSSimon Schubert if (!wcs_string1 || !mbs_offset1 || !is_binary)
50905796c8dcSSimon Schubert {
50915796c8dcSSimon Schubert if (size1 > MAX_ALLOCA_SIZE)
50925796c8dcSSimon Schubert {
50935796c8dcSSimon Schubert free (wcs_string1);
50945796c8dcSSimon Schubert free (mbs_offset1);
50955796c8dcSSimon Schubert free (is_binary);
50965796c8dcSSimon Schubert }
50975796c8dcSSimon Schubert else
50985796c8dcSSimon Schubert {
50995796c8dcSSimon Schubert FREE_VAR (wcs_string1);
51005796c8dcSSimon Schubert FREE_VAR (mbs_offset1);
51015796c8dcSSimon Schubert FREE_VAR (is_binary);
51025796c8dcSSimon Schubert }
51035796c8dcSSimon Schubert return -2;
51045796c8dcSSimon Schubert }
51055796c8dcSSimon Schubert wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
51065796c8dcSSimon Schubert mbs_offset1, is_binary);
51075796c8dcSSimon Schubert wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */
51085796c8dcSSimon Schubert if (size1 > MAX_ALLOCA_SIZE)
51095796c8dcSSimon Schubert free (is_binary);
51105796c8dcSSimon Schubert else
51115796c8dcSSimon Schubert FREE_VAR (is_binary);
51125796c8dcSSimon Schubert }
51135796c8dcSSimon Schubert if (size2 != 0)
51145796c8dcSSimon Schubert {
51155796c8dcSSimon Schubert if (size2 > MAX_ALLOCA_SIZE)
51165796c8dcSSimon Schubert {
51175796c8dcSSimon Schubert wcs_string2 = TALLOC (size2 + 1, CHAR_T);
51185796c8dcSSimon Schubert mbs_offset2 = TALLOC (size2 + 1, int);
51195796c8dcSSimon Schubert is_binary = TALLOC (size2 + 1, char);
51205796c8dcSSimon Schubert }
51215796c8dcSSimon Schubert else
51225796c8dcSSimon Schubert {
51235796c8dcSSimon Schubert wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
51245796c8dcSSimon Schubert mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
51255796c8dcSSimon Schubert is_binary = REGEX_TALLOC (size2 + 1, char);
51265796c8dcSSimon Schubert }
51275796c8dcSSimon Schubert if (!wcs_string2 || !mbs_offset2 || !is_binary)
51285796c8dcSSimon Schubert {
51295796c8dcSSimon Schubert FREE_WCS_BUFFERS ();
51305796c8dcSSimon Schubert if (size2 > MAX_ALLOCA_SIZE)
51315796c8dcSSimon Schubert free (is_binary);
51325796c8dcSSimon Schubert else
51335796c8dcSSimon Schubert FREE_VAR (is_binary);
51345796c8dcSSimon Schubert return -2;
51355796c8dcSSimon Schubert }
51365796c8dcSSimon Schubert wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
51375796c8dcSSimon Schubert mbs_offset2, is_binary);
51385796c8dcSSimon Schubert wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */
51395796c8dcSSimon Schubert if (size2 > MAX_ALLOCA_SIZE)
51405796c8dcSSimon Schubert free (is_binary);
51415796c8dcSSimon Schubert else
51425796c8dcSSimon Schubert FREE_VAR (is_binary);
51435796c8dcSSimon Schubert }
51445796c8dcSSimon Schubert #endif /* WCHAR */
51455796c8dcSSimon Schubert
51465796c8dcSSimon Schubert
51475796c8dcSSimon Schubert /* Loop through the string, looking for a place to start matching. */
51485796c8dcSSimon Schubert for (;;)
51495796c8dcSSimon Schubert {
51505796c8dcSSimon Schubert /* If a fastmap is supplied, skip quickly over characters that
51515796c8dcSSimon Schubert cannot be the start of a match. If the pattern can match the
51525796c8dcSSimon Schubert null string, however, we don't need to skip characters; we want
51535796c8dcSSimon Schubert the first null string. */
51545796c8dcSSimon Schubert if (fastmap && startpos < total_size && !bufp->can_be_null)
51555796c8dcSSimon Schubert {
51565796c8dcSSimon Schubert if (range > 0) /* Searching forwards. */
51575796c8dcSSimon Schubert {
51585796c8dcSSimon Schubert register const char *d;
51595796c8dcSSimon Schubert register int lim = 0;
51605796c8dcSSimon Schubert int irange = range;
51615796c8dcSSimon Schubert
51625796c8dcSSimon Schubert if (startpos < size1 && startpos + range >= size1)
51635796c8dcSSimon Schubert lim = range - (size1 - startpos);
51645796c8dcSSimon Schubert
51655796c8dcSSimon Schubert d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
51665796c8dcSSimon Schubert
51675796c8dcSSimon Schubert /* Written out as an if-else to avoid testing `translate'
51685796c8dcSSimon Schubert inside the loop. */
51695796c8dcSSimon Schubert if (translate)
51705796c8dcSSimon Schubert while (range > lim
51715796c8dcSSimon Schubert && !fastmap[(unsigned char)
51725796c8dcSSimon Schubert translate[(unsigned char) *d++]])
51735796c8dcSSimon Schubert range--;
51745796c8dcSSimon Schubert else
51755796c8dcSSimon Schubert while (range > lim && !fastmap[(unsigned char) *d++])
51765796c8dcSSimon Schubert range--;
51775796c8dcSSimon Schubert
51785796c8dcSSimon Schubert startpos += irange - range;
51795796c8dcSSimon Schubert }
51805796c8dcSSimon Schubert else /* Searching backwards. */
51815796c8dcSSimon Schubert {
51825796c8dcSSimon Schubert register CHAR_T c = (size1 == 0 || startpos >= size1
51835796c8dcSSimon Schubert ? string2[startpos - size1]
51845796c8dcSSimon Schubert : string1[startpos]);
51855796c8dcSSimon Schubert
51865796c8dcSSimon Schubert if (!fastmap[(unsigned char) TRANSLATE (c)])
51875796c8dcSSimon Schubert goto advance;
51885796c8dcSSimon Schubert }
51895796c8dcSSimon Schubert }
51905796c8dcSSimon Schubert
51915796c8dcSSimon Schubert /* If can't match the null string, and that's all we have left, fail. */
51925796c8dcSSimon Schubert if (range >= 0 && startpos == total_size && fastmap
51935796c8dcSSimon Schubert && !bufp->can_be_null)
51945796c8dcSSimon Schubert {
51955796c8dcSSimon Schubert #ifdef WCHAR
51965796c8dcSSimon Schubert FREE_WCS_BUFFERS ();
51975796c8dcSSimon Schubert #endif
51985796c8dcSSimon Schubert return -1;
51995796c8dcSSimon Schubert }
52005796c8dcSSimon Schubert
52015796c8dcSSimon Schubert #ifdef WCHAR
52025796c8dcSSimon Schubert val = wcs_re_match_2_internal (bufp, string1, size1, string2,
52035796c8dcSSimon Schubert size2, startpos, regs, stop,
52045796c8dcSSimon Schubert wcs_string1, wcs_size1,
52055796c8dcSSimon Schubert wcs_string2, wcs_size2,
52065796c8dcSSimon Schubert mbs_offset1, mbs_offset2);
52075796c8dcSSimon Schubert #else /* BYTE */
52085796c8dcSSimon Schubert val = byte_re_match_2_internal (bufp, string1, size1, string2,
52095796c8dcSSimon Schubert size2, startpos, regs, stop);
52105796c8dcSSimon Schubert #endif /* BYTE */
52115796c8dcSSimon Schubert
52125796c8dcSSimon Schubert #ifndef REGEX_MALLOC
52135796c8dcSSimon Schubert # ifdef C_ALLOCA
52145796c8dcSSimon Schubert alloca (0);
52155796c8dcSSimon Schubert # endif
52165796c8dcSSimon Schubert #endif
52175796c8dcSSimon Schubert
52185796c8dcSSimon Schubert if (val >= 0)
52195796c8dcSSimon Schubert {
52205796c8dcSSimon Schubert #ifdef WCHAR
52215796c8dcSSimon Schubert FREE_WCS_BUFFERS ();
52225796c8dcSSimon Schubert #endif
52235796c8dcSSimon Schubert return startpos;
52245796c8dcSSimon Schubert }
52255796c8dcSSimon Schubert
52265796c8dcSSimon Schubert if (val == -2)
52275796c8dcSSimon Schubert {
52285796c8dcSSimon Schubert #ifdef WCHAR
52295796c8dcSSimon Schubert FREE_WCS_BUFFERS ();
52305796c8dcSSimon Schubert #endif
52315796c8dcSSimon Schubert return -2;
52325796c8dcSSimon Schubert }
52335796c8dcSSimon Schubert
52345796c8dcSSimon Schubert advance:
52355796c8dcSSimon Schubert if (!range)
52365796c8dcSSimon Schubert break;
52375796c8dcSSimon Schubert else if (range > 0)
52385796c8dcSSimon Schubert {
52395796c8dcSSimon Schubert range--;
52405796c8dcSSimon Schubert startpos++;
52415796c8dcSSimon Schubert }
52425796c8dcSSimon Schubert else
52435796c8dcSSimon Schubert {
52445796c8dcSSimon Schubert range++;
52455796c8dcSSimon Schubert startpos--;
52465796c8dcSSimon Schubert }
52475796c8dcSSimon Schubert }
52485796c8dcSSimon Schubert #ifdef WCHAR
52495796c8dcSSimon Schubert FREE_WCS_BUFFERS ();
52505796c8dcSSimon Schubert #endif
52515796c8dcSSimon Schubert return -1;
52525796c8dcSSimon Schubert }
52535796c8dcSSimon Schubert
52545796c8dcSSimon Schubert #ifdef WCHAR
52555796c8dcSSimon Schubert /* This converts PTR, a pointer into one of the search wchar_t strings
52565796c8dcSSimon Schubert `string1' and `string2' into an multibyte string offset from the
52575796c8dcSSimon Schubert beginning of that string. We use mbs_offset to optimize.
52585796c8dcSSimon Schubert See convert_mbs_to_wcs. */
52595796c8dcSSimon Schubert # define POINTER_TO_OFFSET(ptr) \
52605796c8dcSSimon Schubert (FIRST_STRING_P (ptr) \
52615796c8dcSSimon Schubert ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
52625796c8dcSSimon Schubert : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
52635796c8dcSSimon Schubert + csize1)))
52645796c8dcSSimon Schubert #else /* BYTE */
52655796c8dcSSimon Schubert /* This converts PTR, a pointer into one of the search strings `string1'
52665796c8dcSSimon Schubert and `string2' into an offset from the beginning of that string. */
52675796c8dcSSimon Schubert # define POINTER_TO_OFFSET(ptr) \
52685796c8dcSSimon Schubert (FIRST_STRING_P (ptr) \
52695796c8dcSSimon Schubert ? ((regoff_t) ((ptr) - string1)) \
52705796c8dcSSimon Schubert : ((regoff_t) ((ptr) - string2 + size1)))
52715796c8dcSSimon Schubert #endif /* WCHAR */
52725796c8dcSSimon Schubert
52735796c8dcSSimon Schubert /* Macros for dealing with the split strings in re_match_2. */
52745796c8dcSSimon Schubert
52755796c8dcSSimon Schubert #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
52765796c8dcSSimon Schubert
52775796c8dcSSimon Schubert /* Call before fetching a character with *d. This switches over to
52785796c8dcSSimon Schubert string2 if necessary. */
52795796c8dcSSimon Schubert #define PREFETCH() \
52805796c8dcSSimon Schubert while (d == dend) \
52815796c8dcSSimon Schubert { \
52825796c8dcSSimon Schubert /* End of string2 => fail. */ \
52835796c8dcSSimon Schubert if (dend == end_match_2) \
52845796c8dcSSimon Schubert goto fail; \
52855796c8dcSSimon Schubert /* End of string1 => advance to string2. */ \
52865796c8dcSSimon Schubert d = string2; \
52875796c8dcSSimon Schubert dend = end_match_2; \
52885796c8dcSSimon Schubert }
52895796c8dcSSimon Schubert
52905796c8dcSSimon Schubert /* Test if at very beginning or at very end of the virtual concatenation
52915796c8dcSSimon Schubert of `string1' and `string2'. If only one string, it's `string2'. */
52925796c8dcSSimon Schubert #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
52935796c8dcSSimon Schubert #define AT_STRINGS_END(d) ((d) == end2)
52945796c8dcSSimon Schubert
52955796c8dcSSimon Schubert
52965796c8dcSSimon Schubert /* Test if D points to a character which is word-constituent. We have
52975796c8dcSSimon Schubert two special cases to check for: if past the end of string1, look at
52985796c8dcSSimon Schubert the first character in string2; and if before the beginning of
52995796c8dcSSimon Schubert string2, look at the last character in string1. */
53005796c8dcSSimon Schubert #ifdef WCHAR
53015796c8dcSSimon Schubert /* Use internationalized API instead of SYNTAX. */
53025796c8dcSSimon Schubert # define WORDCHAR_P(d) \
53035796c8dcSSimon Schubert (iswalnum ((wint_t)((d) == end1 ? *string2 \
53045796c8dcSSimon Schubert : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \
53055796c8dcSSimon Schubert || ((d) == end1 ? *string2 \
53065796c8dcSSimon Schubert : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
53075796c8dcSSimon Schubert #else /* BYTE */
53085796c8dcSSimon Schubert # define WORDCHAR_P(d) \
53095796c8dcSSimon Schubert (SYNTAX ((d) == end1 ? *string2 \
53105796c8dcSSimon Schubert : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
53115796c8dcSSimon Schubert == Sword)
53125796c8dcSSimon Schubert #endif /* WCHAR */
53135796c8dcSSimon Schubert
53145796c8dcSSimon Schubert /* Disabled due to a compiler bug -- see comment at case wordbound */
53155796c8dcSSimon Schubert #if 0
53165796c8dcSSimon Schubert /* Test if the character before D and the one at D differ with respect
53175796c8dcSSimon Schubert to being word-constituent. */
53185796c8dcSSimon Schubert #define AT_WORD_BOUNDARY(d) \
53195796c8dcSSimon Schubert (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
53205796c8dcSSimon Schubert || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
53215796c8dcSSimon Schubert #endif
53225796c8dcSSimon Schubert
53235796c8dcSSimon Schubert /* Free everything we malloc. */
53245796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE
53255796c8dcSSimon Schubert # ifdef WCHAR
53265796c8dcSSimon Schubert # define FREE_VARIABLES() \
53275796c8dcSSimon Schubert do { \
53285796c8dcSSimon Schubert REGEX_FREE_STACK (fail_stack.stack); \
53295796c8dcSSimon Schubert FREE_VAR (regstart); \
53305796c8dcSSimon Schubert FREE_VAR (regend); \
53315796c8dcSSimon Schubert FREE_VAR (old_regstart); \
53325796c8dcSSimon Schubert FREE_VAR (old_regend); \
53335796c8dcSSimon Schubert FREE_VAR (best_regstart); \
53345796c8dcSSimon Schubert FREE_VAR (best_regend); \
53355796c8dcSSimon Schubert FREE_VAR (reg_info); \
53365796c8dcSSimon Schubert FREE_VAR (reg_dummy); \
53375796c8dcSSimon Schubert FREE_VAR (reg_info_dummy); \
53385796c8dcSSimon Schubert if (!cant_free_wcs_buf) \
53395796c8dcSSimon Schubert { \
53405796c8dcSSimon Schubert FREE_VAR (string1); \
53415796c8dcSSimon Schubert FREE_VAR (string2); \
53425796c8dcSSimon Schubert FREE_VAR (mbs_offset1); \
53435796c8dcSSimon Schubert FREE_VAR (mbs_offset2); \
53445796c8dcSSimon Schubert } \
53455796c8dcSSimon Schubert } while (0)
53465796c8dcSSimon Schubert # else /* BYTE */
53475796c8dcSSimon Schubert # define FREE_VARIABLES() \
53485796c8dcSSimon Schubert do { \
53495796c8dcSSimon Schubert REGEX_FREE_STACK (fail_stack.stack); \
53505796c8dcSSimon Schubert FREE_VAR (regstart); \
53515796c8dcSSimon Schubert FREE_VAR (regend); \
53525796c8dcSSimon Schubert FREE_VAR (old_regstart); \
53535796c8dcSSimon Schubert FREE_VAR (old_regend); \
53545796c8dcSSimon Schubert FREE_VAR (best_regstart); \
53555796c8dcSSimon Schubert FREE_VAR (best_regend); \
53565796c8dcSSimon Schubert FREE_VAR (reg_info); \
53575796c8dcSSimon Schubert FREE_VAR (reg_dummy); \
53585796c8dcSSimon Schubert FREE_VAR (reg_info_dummy); \
53595796c8dcSSimon Schubert } while (0)
53605796c8dcSSimon Schubert # endif /* WCHAR */
53615796c8dcSSimon Schubert #else
53625796c8dcSSimon Schubert # ifdef WCHAR
53635796c8dcSSimon Schubert # define FREE_VARIABLES() \
53645796c8dcSSimon Schubert do { \
53655796c8dcSSimon Schubert if (!cant_free_wcs_buf) \
53665796c8dcSSimon Schubert { \
53675796c8dcSSimon Schubert FREE_VAR (string1); \
53685796c8dcSSimon Schubert FREE_VAR (string2); \
53695796c8dcSSimon Schubert FREE_VAR (mbs_offset1); \
53705796c8dcSSimon Schubert FREE_VAR (mbs_offset2); \
53715796c8dcSSimon Schubert } \
53725796c8dcSSimon Schubert } while (0)
53735796c8dcSSimon Schubert # else /* BYTE */
53745796c8dcSSimon Schubert # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
53755796c8dcSSimon Schubert # endif /* WCHAR */
53765796c8dcSSimon Schubert #endif /* not MATCH_MAY_ALLOCATE */
53775796c8dcSSimon Schubert
53785796c8dcSSimon Schubert /* These values must meet several constraints. They must not be valid
53795796c8dcSSimon Schubert register values; since we have a limit of 255 registers (because
53805796c8dcSSimon Schubert we use only one byte in the pattern for the register number), we can
53815796c8dcSSimon Schubert use numbers larger than 255. They must differ by 1, because of
53825796c8dcSSimon Schubert NUM_FAILURE_ITEMS above. And the value for the lowest register must
53835796c8dcSSimon Schubert be larger than the value for the highest register, so we do not try
53845796c8dcSSimon Schubert to actually save any registers when none are active. */
53855796c8dcSSimon Schubert #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
53865796c8dcSSimon Schubert #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
53875796c8dcSSimon Schubert
53885796c8dcSSimon Schubert #else /* not INSIDE_RECURSION */
53895796c8dcSSimon Schubert /* Matching routines. */
53905796c8dcSSimon Schubert
53915796c8dcSSimon Schubert #ifndef emacs /* Emacs never uses this. */
53925796c8dcSSimon Schubert /* re_match is like re_match_2 except it takes only a single string. */
53935796c8dcSSimon Schubert
53945796c8dcSSimon Schubert int
53955796c8dcSSimon Schubert re_match (struct re_pattern_buffer *bufp, const char *string,
53965796c8dcSSimon Schubert int size, int pos, struct re_registers *regs)
53975796c8dcSSimon Schubert {
53985796c8dcSSimon Schubert int result;
53995796c8dcSSimon Schubert # ifdef MBS_SUPPORT
54005796c8dcSSimon Schubert if (MB_CUR_MAX != 1)
54015796c8dcSSimon Schubert result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
54025796c8dcSSimon Schubert pos, regs, size,
54035796c8dcSSimon Schubert NULL, 0, NULL, 0, NULL, NULL);
54045796c8dcSSimon Schubert else
54055796c8dcSSimon Schubert # endif
54065796c8dcSSimon Schubert result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
54075796c8dcSSimon Schubert pos, regs, size);
54085796c8dcSSimon Schubert # ifndef REGEX_MALLOC
54095796c8dcSSimon Schubert # ifdef C_ALLOCA
54105796c8dcSSimon Schubert alloca (0);
54115796c8dcSSimon Schubert # endif
54125796c8dcSSimon Schubert # endif
54135796c8dcSSimon Schubert return result;
54145796c8dcSSimon Schubert }
54155796c8dcSSimon Schubert # ifdef _LIBC
54165796c8dcSSimon Schubert weak_alias (__re_match, re_match)
54175796c8dcSSimon Schubert # endif
54185796c8dcSSimon Schubert #endif /* not emacs */
54195796c8dcSSimon Schubert
54205796c8dcSSimon Schubert #endif /* not INSIDE_RECURSION */
54215796c8dcSSimon Schubert
54225796c8dcSSimon Schubert #ifdef INSIDE_RECURSION
54235796c8dcSSimon Schubert static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
54245796c8dcSSimon Schubert UCHAR_T *end,
54255796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info);
54265796c8dcSSimon Schubert static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
54275796c8dcSSimon Schubert UCHAR_T *end,
54285796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info);
54295796c8dcSSimon Schubert static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
54305796c8dcSSimon Schubert UCHAR_T *end,
54315796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info);
54325796c8dcSSimon Schubert static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
54335796c8dcSSimon Schubert int len, char *translate);
54345796c8dcSSimon Schubert #else /* not INSIDE_RECURSION */
54355796c8dcSSimon Schubert
54365796c8dcSSimon Schubert /* re_match_2 matches the compiled pattern in BUFP against the
54375796c8dcSSimon Schubert the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
54385796c8dcSSimon Schubert and SIZE2, respectively). We start matching at POS, and stop
54395796c8dcSSimon Schubert matching at STOP.
54405796c8dcSSimon Schubert
54415796c8dcSSimon Schubert If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
54425796c8dcSSimon Schubert store offsets for the substring each group matched in REGS. See the
54435796c8dcSSimon Schubert documentation for exactly how many groups we fill.
54445796c8dcSSimon Schubert
54455796c8dcSSimon Schubert We return -1 if no match, -2 if an internal error (such as the
54465796c8dcSSimon Schubert failure stack overflowing). Otherwise, we return the length of the
54475796c8dcSSimon Schubert matched substring. */
54485796c8dcSSimon Schubert
54495796c8dcSSimon Schubert int
re_match_2(struct re_pattern_buffer * bufp,const char * string1,int size1,const char * string2,int size2,int pos,struct re_registers * regs,int stop)54505796c8dcSSimon Schubert re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
54515796c8dcSSimon Schubert const char *string2, int size2, int pos,
54525796c8dcSSimon Schubert struct re_registers *regs, int stop)
54535796c8dcSSimon Schubert {
54545796c8dcSSimon Schubert int result;
54555796c8dcSSimon Schubert # ifdef MBS_SUPPORT
54565796c8dcSSimon Schubert if (MB_CUR_MAX != 1)
54575796c8dcSSimon Schubert result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
54585796c8dcSSimon Schubert pos, regs, stop,
54595796c8dcSSimon Schubert NULL, 0, NULL, 0, NULL, NULL);
54605796c8dcSSimon Schubert else
54615796c8dcSSimon Schubert # endif
54625796c8dcSSimon Schubert result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
54635796c8dcSSimon Schubert pos, regs, stop);
54645796c8dcSSimon Schubert
54655796c8dcSSimon Schubert #ifndef REGEX_MALLOC
54665796c8dcSSimon Schubert # ifdef C_ALLOCA
54675796c8dcSSimon Schubert alloca (0);
54685796c8dcSSimon Schubert # endif
54695796c8dcSSimon Schubert #endif
54705796c8dcSSimon Schubert return result;
54715796c8dcSSimon Schubert }
54725796c8dcSSimon Schubert #ifdef _LIBC
54735796c8dcSSimon Schubert weak_alias (__re_match_2, re_match_2)
54745796c8dcSSimon Schubert #endif
54755796c8dcSSimon Schubert
54765796c8dcSSimon Schubert #endif /* not INSIDE_RECURSION */
54775796c8dcSSimon Schubert
54785796c8dcSSimon Schubert #ifdef INSIDE_RECURSION
54795796c8dcSSimon Schubert
54805796c8dcSSimon Schubert #ifdef WCHAR
54815796c8dcSSimon Schubert static int count_mbs_length (int *, int);
54825796c8dcSSimon Schubert
54835796c8dcSSimon Schubert /* This check the substring (from 0, to length) of the multibyte string,
54845796c8dcSSimon Schubert to which offset_buffer correspond. And count how many wchar_t_characters
54855796c8dcSSimon Schubert the substring occupy. We use offset_buffer to optimization.
54865796c8dcSSimon Schubert See convert_mbs_to_wcs. */
54875796c8dcSSimon Schubert
54885796c8dcSSimon Schubert static int
count_mbs_length(int * offset_buffer,int length)54895796c8dcSSimon Schubert count_mbs_length(int *offset_buffer, int length)
54905796c8dcSSimon Schubert {
54915796c8dcSSimon Schubert int upper, lower;
54925796c8dcSSimon Schubert
54935796c8dcSSimon Schubert /* Check whether the size is valid. */
54945796c8dcSSimon Schubert if (length < 0)
54955796c8dcSSimon Schubert return -1;
54965796c8dcSSimon Schubert
54975796c8dcSSimon Schubert if (offset_buffer == NULL)
54985796c8dcSSimon Schubert return 0;
54995796c8dcSSimon Schubert
55005796c8dcSSimon Schubert /* If there are no multibyte character, offset_buffer[i] == i.
55015796c8dcSSimon Schubert Optmize for this case. */
55025796c8dcSSimon Schubert if (offset_buffer[length] == length)
55035796c8dcSSimon Schubert return length;
55045796c8dcSSimon Schubert
55055796c8dcSSimon Schubert /* Set up upper with length. (because for all i, offset_buffer[i] >= i) */
55065796c8dcSSimon Schubert upper = length;
55075796c8dcSSimon Schubert lower = 0;
55085796c8dcSSimon Schubert
55095796c8dcSSimon Schubert while (true)
55105796c8dcSSimon Schubert {
55115796c8dcSSimon Schubert int middle = (lower + upper) / 2;
55125796c8dcSSimon Schubert if (middle == lower || middle == upper)
55135796c8dcSSimon Schubert break;
55145796c8dcSSimon Schubert if (offset_buffer[middle] > length)
55155796c8dcSSimon Schubert upper = middle;
55165796c8dcSSimon Schubert else if (offset_buffer[middle] < length)
55175796c8dcSSimon Schubert lower = middle;
55185796c8dcSSimon Schubert else
55195796c8dcSSimon Schubert return middle;
55205796c8dcSSimon Schubert }
55215796c8dcSSimon Schubert
55225796c8dcSSimon Schubert return -1;
55235796c8dcSSimon Schubert }
55245796c8dcSSimon Schubert #endif /* WCHAR */
55255796c8dcSSimon Schubert
55265796c8dcSSimon Schubert /* This is a separate function so that we can force an alloca cleanup
55275796c8dcSSimon Schubert afterwards. */
55285796c8dcSSimon Schubert #ifdef WCHAR
55295796c8dcSSimon Schubert static int
wcs_re_match_2_internal(struct re_pattern_buffer * bufp,const char * cstring1,int csize1,const char * cstring2,int csize2,int pos,struct re_registers * regs,int stop,wchar_t * string1,int size1,wchar_t * string2,int size2,int * mbs_offset1,int * mbs_offset2)55305796c8dcSSimon Schubert wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
55315796c8dcSSimon Schubert const char *cstring1, int csize1,
55325796c8dcSSimon Schubert const char *cstring2, int csize2,
55335796c8dcSSimon Schubert int pos,
55345796c8dcSSimon Schubert struct re_registers *regs,
55355796c8dcSSimon Schubert int stop,
55365796c8dcSSimon Schubert /* string1 == string2 == NULL means string1/2, size1/2 and
55375796c8dcSSimon Schubert mbs_offset1/2 need seting up in this function. */
55385796c8dcSSimon Schubert /* We need wchar_t* buffers correspond to cstring1, cstring2. */
55395796c8dcSSimon Schubert wchar_t *string1, int size1,
55405796c8dcSSimon Schubert wchar_t *string2, int size2,
55415796c8dcSSimon Schubert /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
55425796c8dcSSimon Schubert int *mbs_offset1, int *mbs_offset2)
55435796c8dcSSimon Schubert #else /* BYTE */
55445796c8dcSSimon Schubert static int
55455796c8dcSSimon Schubert byte_re_match_2_internal (struct re_pattern_buffer *bufp,
55465796c8dcSSimon Schubert const char *string1, int size1,
55475796c8dcSSimon Schubert const char *string2, int size2,
55485796c8dcSSimon Schubert int pos,
55495796c8dcSSimon Schubert struct re_registers *regs, int stop)
55505796c8dcSSimon Schubert #endif /* BYTE */
55515796c8dcSSimon Schubert {
55525796c8dcSSimon Schubert /* General temporaries. */
55535796c8dcSSimon Schubert int mcnt;
55545796c8dcSSimon Schubert UCHAR_T *p1;
55555796c8dcSSimon Schubert #ifdef WCHAR
55565796c8dcSSimon Schubert /* They hold whether each wchar_t is binary data or not. */
55575796c8dcSSimon Schubert char *is_binary = NULL;
55585796c8dcSSimon Schubert /* If true, we can't free string1/2, mbs_offset1/2. */
55595796c8dcSSimon Schubert int cant_free_wcs_buf = 1;
55605796c8dcSSimon Schubert #endif /* WCHAR */
55615796c8dcSSimon Schubert
55625796c8dcSSimon Schubert /* Just past the end of the corresponding string. */
55635796c8dcSSimon Schubert const CHAR_T *end1, *end2;
55645796c8dcSSimon Schubert
55655796c8dcSSimon Schubert /* Pointers into string1 and string2, just past the last characters in
55665796c8dcSSimon Schubert each to consider matching. */
55675796c8dcSSimon Schubert const CHAR_T *end_match_1, *end_match_2;
55685796c8dcSSimon Schubert
55695796c8dcSSimon Schubert /* Where we are in the data, and the end of the current string. */
55705796c8dcSSimon Schubert const CHAR_T *d, *dend;
55715796c8dcSSimon Schubert
55725796c8dcSSimon Schubert /* Where we are in the pattern, and the end of the pattern. */
55735796c8dcSSimon Schubert #ifdef WCHAR
55745796c8dcSSimon Schubert UCHAR_T *pattern, *p;
55755796c8dcSSimon Schubert register UCHAR_T *pend;
55765796c8dcSSimon Schubert #else /* BYTE */
55775796c8dcSSimon Schubert UCHAR_T *p = bufp->buffer;
55785796c8dcSSimon Schubert register UCHAR_T *pend = p + bufp->used;
55795796c8dcSSimon Schubert #endif /* WCHAR */
55805796c8dcSSimon Schubert
55815796c8dcSSimon Schubert /* Mark the opcode just after a start_memory, so we can test for an
55825796c8dcSSimon Schubert empty subpattern when we get to the stop_memory. */
55835796c8dcSSimon Schubert UCHAR_T *just_past_start_mem = 0;
55845796c8dcSSimon Schubert
55855796c8dcSSimon Schubert /* We use this to map every character in the string. */
55865796c8dcSSimon Schubert RE_TRANSLATE_TYPE translate = bufp->translate;
55875796c8dcSSimon Schubert
55885796c8dcSSimon Schubert /* Failure point stack. Each place that can handle a failure further
55895796c8dcSSimon Schubert down the line pushes a failure point on this stack. It consists of
55905796c8dcSSimon Schubert restart, regend, and reg_info for all registers corresponding to
55915796c8dcSSimon Schubert the subexpressions we're currently inside, plus the number of such
55925796c8dcSSimon Schubert registers, and, finally, two char *'s. The first char * is where
55935796c8dcSSimon Schubert to resume scanning the pattern; the second one is where to resume
55945796c8dcSSimon Schubert scanning the strings. If the latter is zero, the failure point is
55955796c8dcSSimon Schubert a ``dummy''; if a failure happens and the failure point is a dummy,
55965796c8dcSSimon Schubert it gets discarded and the next next one is tried. */
55975796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
55985796c8dcSSimon Schubert PREFIX(fail_stack_type) fail_stack;
55995796c8dcSSimon Schubert #endif
56005796c8dcSSimon Schubert #ifdef DEBUG
56015796c8dcSSimon Schubert static unsigned failure_id;
56025796c8dcSSimon Schubert unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
56035796c8dcSSimon Schubert #endif
56045796c8dcSSimon Schubert
56055796c8dcSSimon Schubert #ifdef REL_ALLOC
56065796c8dcSSimon Schubert /* This holds the pointer to the failure stack, when
56075796c8dcSSimon Schubert it is allocated relocatably. */
56085796c8dcSSimon Schubert fail_stack_elt_t *failure_stack_ptr;
56095796c8dcSSimon Schubert #endif
56105796c8dcSSimon Schubert
56115796c8dcSSimon Schubert /* We fill all the registers internally, independent of what we
56125796c8dcSSimon Schubert return, for use in backreferences. The number here includes
56135796c8dcSSimon Schubert an element for register zero. */
56145796c8dcSSimon Schubert size_t num_regs = bufp->re_nsub + 1;
56155796c8dcSSimon Schubert
56165796c8dcSSimon Schubert /* The currently active registers. */
56175796c8dcSSimon Schubert active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
56185796c8dcSSimon Schubert active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
56195796c8dcSSimon Schubert
56205796c8dcSSimon Schubert /* Information on the contents of registers. These are pointers into
56215796c8dcSSimon Schubert the input strings; they record just what was matched (on this
56225796c8dcSSimon Schubert attempt) by a subexpression part of the pattern, that is, the
56235796c8dcSSimon Schubert regnum-th regstart pointer points to where in the pattern we began
56245796c8dcSSimon Schubert matching and the regnum-th regend points to right after where we
56255796c8dcSSimon Schubert stopped matching the regnum-th subexpression. (The zeroth register
56265796c8dcSSimon Schubert keeps track of what the whole pattern matches.) */
56275796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
56285796c8dcSSimon Schubert const CHAR_T **regstart, **regend;
56295796c8dcSSimon Schubert #endif
56305796c8dcSSimon Schubert
56315796c8dcSSimon Schubert /* If a group that's operated upon by a repetition operator fails to
56325796c8dcSSimon Schubert match anything, then the register for its start will need to be
56335796c8dcSSimon Schubert restored because it will have been set to wherever in the string we
56345796c8dcSSimon Schubert are when we last see its open-group operator. Similarly for a
56355796c8dcSSimon Schubert register's end. */
56365796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
56375796c8dcSSimon Schubert const CHAR_T **old_regstart, **old_regend;
56385796c8dcSSimon Schubert #endif
56395796c8dcSSimon Schubert
56405796c8dcSSimon Schubert /* The is_active field of reg_info helps us keep track of which (possibly
56415796c8dcSSimon Schubert nested) subexpressions we are currently in. The matched_something
56425796c8dcSSimon Schubert field of reg_info[reg_num] helps us tell whether or not we have
56435796c8dcSSimon Schubert matched any of the pattern so far this time through the reg_num-th
56445796c8dcSSimon Schubert subexpression. These two fields get reset each time through any
56455796c8dcSSimon Schubert loop their register is in. */
56465796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
56475796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info;
56485796c8dcSSimon Schubert #endif
56495796c8dcSSimon Schubert
56505796c8dcSSimon Schubert /* The following record the register info as found in the above
56515796c8dcSSimon Schubert variables when we find a match better than any we've seen before.
56525796c8dcSSimon Schubert This happens as we backtrack through the failure points, which in
56535796c8dcSSimon Schubert turn happens only if we have not yet matched the entire string. */
56545796c8dcSSimon Schubert unsigned best_regs_set = false;
56555796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
56565796c8dcSSimon Schubert const CHAR_T **best_regstart, **best_regend;
56575796c8dcSSimon Schubert #endif
56585796c8dcSSimon Schubert
56595796c8dcSSimon Schubert /* Logically, this is `best_regend[0]'. But we don't want to have to
56605796c8dcSSimon Schubert allocate space for that if we're not allocating space for anything
56615796c8dcSSimon Schubert else (see below). Also, we never need info about register 0 for
56625796c8dcSSimon Schubert any of the other register vectors, and it seems rather a kludge to
56635796c8dcSSimon Schubert treat `best_regend' differently than the rest. So we keep track of
56645796c8dcSSimon Schubert the end of the best match so far in a separate variable. We
56655796c8dcSSimon Schubert initialize this to NULL so that when we backtrack the first time
56665796c8dcSSimon Schubert and need to test it, it's not garbage. */
56675796c8dcSSimon Schubert const CHAR_T *match_end = NULL;
56685796c8dcSSimon Schubert
56695796c8dcSSimon Schubert /* This helps SET_REGS_MATCHED avoid doing redundant work. */
56705796c8dcSSimon Schubert int set_regs_matched_done = 0;
56715796c8dcSSimon Schubert
56725796c8dcSSimon Schubert /* Used when we pop values we don't care about. */
56735796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
56745796c8dcSSimon Schubert const CHAR_T **reg_dummy;
56755796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info_dummy;
56765796c8dcSSimon Schubert #endif
56775796c8dcSSimon Schubert
56785796c8dcSSimon Schubert #ifdef DEBUG
56795796c8dcSSimon Schubert /* Counts the total number of registers pushed. */
56805796c8dcSSimon Schubert unsigned num_regs_pushed = 0;
56815796c8dcSSimon Schubert #endif
56825796c8dcSSimon Schubert
56835796c8dcSSimon Schubert DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
56845796c8dcSSimon Schubert
56855796c8dcSSimon Schubert INIT_FAIL_STACK ();
56865796c8dcSSimon Schubert
56875796c8dcSSimon Schubert #ifdef MATCH_MAY_ALLOCATE
56885796c8dcSSimon Schubert /* Do not bother to initialize all the register variables if there are
56895796c8dcSSimon Schubert no groups in the pattern, as it takes a fair amount of time. If
56905796c8dcSSimon Schubert there are groups, we include space for register 0 (the whole
56915796c8dcSSimon Schubert pattern), even though we never use it, since it simplifies the
56925796c8dcSSimon Schubert array indexing. We should fix this. */
56935796c8dcSSimon Schubert if (bufp->re_nsub)
56945796c8dcSSimon Schubert {
56955796c8dcSSimon Schubert regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
56965796c8dcSSimon Schubert regend = REGEX_TALLOC (num_regs, const CHAR_T *);
56975796c8dcSSimon Schubert old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
56985796c8dcSSimon Schubert old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
56995796c8dcSSimon Schubert best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
57005796c8dcSSimon Schubert best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
57015796c8dcSSimon Schubert reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
57025796c8dcSSimon Schubert reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
57035796c8dcSSimon Schubert reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
57045796c8dcSSimon Schubert
57055796c8dcSSimon Schubert if (!(regstart && regend && old_regstart && old_regend && reg_info
57065796c8dcSSimon Schubert && best_regstart && best_regend && reg_dummy && reg_info_dummy))
57075796c8dcSSimon Schubert {
57085796c8dcSSimon Schubert FREE_VARIABLES ();
57095796c8dcSSimon Schubert return -2;
57105796c8dcSSimon Schubert }
57115796c8dcSSimon Schubert }
57125796c8dcSSimon Schubert else
57135796c8dcSSimon Schubert {
57145796c8dcSSimon Schubert /* We must initialize all our variables to NULL, so that
57155796c8dcSSimon Schubert `FREE_VARIABLES' doesn't try to free them. */
57165796c8dcSSimon Schubert regstart = regend = old_regstart = old_regend = best_regstart
57175796c8dcSSimon Schubert = best_regend = reg_dummy = NULL;
57185796c8dcSSimon Schubert reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
57195796c8dcSSimon Schubert }
57205796c8dcSSimon Schubert #endif /* MATCH_MAY_ALLOCATE */
57215796c8dcSSimon Schubert
57225796c8dcSSimon Schubert /* The starting position is bogus. */
57235796c8dcSSimon Schubert #ifdef WCHAR
57245796c8dcSSimon Schubert if (pos < 0 || pos > csize1 + csize2)
57255796c8dcSSimon Schubert #else /* BYTE */
57265796c8dcSSimon Schubert if (pos < 0 || pos > size1 + size2)
57275796c8dcSSimon Schubert #endif
57285796c8dcSSimon Schubert {
57295796c8dcSSimon Schubert FREE_VARIABLES ();
57305796c8dcSSimon Schubert return -1;
57315796c8dcSSimon Schubert }
57325796c8dcSSimon Schubert
57335796c8dcSSimon Schubert #ifdef WCHAR
57345796c8dcSSimon Schubert /* Allocate wchar_t array for string1 and string2 and
57355796c8dcSSimon Schubert fill them with converted string. */
57365796c8dcSSimon Schubert if (string1 == NULL && string2 == NULL)
57375796c8dcSSimon Schubert {
57385796c8dcSSimon Schubert /* We need seting up buffers here. */
57395796c8dcSSimon Schubert
57405796c8dcSSimon Schubert /* We must free wcs buffers in this function. */
57415796c8dcSSimon Schubert cant_free_wcs_buf = 0;
57425796c8dcSSimon Schubert
57435796c8dcSSimon Schubert if (csize1 != 0)
57445796c8dcSSimon Schubert {
57455796c8dcSSimon Schubert string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
57465796c8dcSSimon Schubert mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
57475796c8dcSSimon Schubert is_binary = REGEX_TALLOC (csize1 + 1, char);
57485796c8dcSSimon Schubert if (!string1 || !mbs_offset1 || !is_binary)
57495796c8dcSSimon Schubert {
57505796c8dcSSimon Schubert FREE_VAR (string1);
57515796c8dcSSimon Schubert FREE_VAR (mbs_offset1);
57525796c8dcSSimon Schubert FREE_VAR (is_binary);
57535796c8dcSSimon Schubert return -2;
57545796c8dcSSimon Schubert }
57555796c8dcSSimon Schubert }
57565796c8dcSSimon Schubert if (csize2 != 0)
57575796c8dcSSimon Schubert {
57585796c8dcSSimon Schubert string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
57595796c8dcSSimon Schubert mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
57605796c8dcSSimon Schubert is_binary = REGEX_TALLOC (csize2 + 1, char);
57615796c8dcSSimon Schubert if (!string2 || !mbs_offset2 || !is_binary)
57625796c8dcSSimon Schubert {
57635796c8dcSSimon Schubert FREE_VAR (string1);
57645796c8dcSSimon Schubert FREE_VAR (mbs_offset1);
57655796c8dcSSimon Schubert FREE_VAR (string2);
57665796c8dcSSimon Schubert FREE_VAR (mbs_offset2);
57675796c8dcSSimon Schubert FREE_VAR (is_binary);
57685796c8dcSSimon Schubert return -2;
57695796c8dcSSimon Schubert }
57705796c8dcSSimon Schubert size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
57715796c8dcSSimon Schubert mbs_offset2, is_binary);
57725796c8dcSSimon Schubert string2[size2] = L'\0'; /* for a sentinel */
57735796c8dcSSimon Schubert FREE_VAR (is_binary);
57745796c8dcSSimon Schubert }
57755796c8dcSSimon Schubert }
57765796c8dcSSimon Schubert
57775796c8dcSSimon Schubert /* We need to cast pattern to (wchar_t*), because we casted this compiled
57785796c8dcSSimon Schubert pattern to (char*) in regex_compile. */
57795796c8dcSSimon Schubert p = pattern = (CHAR_T*)bufp->buffer;
57805796c8dcSSimon Schubert pend = (CHAR_T*)(bufp->buffer + bufp->used);
57815796c8dcSSimon Schubert
57825796c8dcSSimon Schubert #endif /* WCHAR */
57835796c8dcSSimon Schubert
57845796c8dcSSimon Schubert /* Initialize subexpression text positions to -1 to mark ones that no
57855796c8dcSSimon Schubert start_memory/stop_memory has been seen for. Also initialize the
57865796c8dcSSimon Schubert register information struct. */
57875796c8dcSSimon Schubert for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
57885796c8dcSSimon Schubert {
57895796c8dcSSimon Schubert regstart[mcnt] = regend[mcnt]
57905796c8dcSSimon Schubert = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
57915796c8dcSSimon Schubert
57925796c8dcSSimon Schubert REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
57935796c8dcSSimon Schubert IS_ACTIVE (reg_info[mcnt]) = 0;
57945796c8dcSSimon Schubert MATCHED_SOMETHING (reg_info[mcnt]) = 0;
57955796c8dcSSimon Schubert EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
57965796c8dcSSimon Schubert }
57975796c8dcSSimon Schubert
57985796c8dcSSimon Schubert /* We move `string1' into `string2' if the latter's empty -- but not if
57995796c8dcSSimon Schubert `string1' is null. */
58005796c8dcSSimon Schubert if (size2 == 0 && string1 != NULL)
58015796c8dcSSimon Schubert {
58025796c8dcSSimon Schubert string2 = string1;
58035796c8dcSSimon Schubert size2 = size1;
58045796c8dcSSimon Schubert string1 = 0;
58055796c8dcSSimon Schubert size1 = 0;
58065796c8dcSSimon Schubert #ifdef WCHAR
58075796c8dcSSimon Schubert mbs_offset2 = mbs_offset1;
58085796c8dcSSimon Schubert csize2 = csize1;
58095796c8dcSSimon Schubert mbs_offset1 = NULL;
58105796c8dcSSimon Schubert csize1 = 0;
58115796c8dcSSimon Schubert #endif
58125796c8dcSSimon Schubert }
58135796c8dcSSimon Schubert end1 = string1 + size1;
58145796c8dcSSimon Schubert end2 = string2 + size2;
58155796c8dcSSimon Schubert
58165796c8dcSSimon Schubert /* Compute where to stop matching, within the two strings. */
58175796c8dcSSimon Schubert #ifdef WCHAR
58185796c8dcSSimon Schubert if (stop <= csize1)
58195796c8dcSSimon Schubert {
58205796c8dcSSimon Schubert mcnt = count_mbs_length(mbs_offset1, stop);
58215796c8dcSSimon Schubert end_match_1 = string1 + mcnt;
58225796c8dcSSimon Schubert end_match_2 = string2;
58235796c8dcSSimon Schubert }
58245796c8dcSSimon Schubert else
58255796c8dcSSimon Schubert {
58265796c8dcSSimon Schubert if (stop > csize1 + csize2)
58275796c8dcSSimon Schubert stop = csize1 + csize2;
58285796c8dcSSimon Schubert end_match_1 = end1;
58295796c8dcSSimon Schubert mcnt = count_mbs_length(mbs_offset2, stop-csize1);
58305796c8dcSSimon Schubert end_match_2 = string2 + mcnt;
58315796c8dcSSimon Schubert }
58325796c8dcSSimon Schubert if (mcnt < 0)
58335796c8dcSSimon Schubert { /* count_mbs_length return error. */
58345796c8dcSSimon Schubert FREE_VARIABLES ();
58355796c8dcSSimon Schubert return -1;
58365796c8dcSSimon Schubert }
58375796c8dcSSimon Schubert #else
58385796c8dcSSimon Schubert if (stop <= size1)
58395796c8dcSSimon Schubert {
58405796c8dcSSimon Schubert end_match_1 = string1 + stop;
58415796c8dcSSimon Schubert end_match_2 = string2;
58425796c8dcSSimon Schubert }
58435796c8dcSSimon Schubert else
58445796c8dcSSimon Schubert {
58455796c8dcSSimon Schubert end_match_1 = end1;
58465796c8dcSSimon Schubert end_match_2 = string2 + stop - size1;
58475796c8dcSSimon Schubert }
58485796c8dcSSimon Schubert #endif /* WCHAR */
58495796c8dcSSimon Schubert
58505796c8dcSSimon Schubert /* `p' scans through the pattern as `d' scans through the data.
58515796c8dcSSimon Schubert `dend' is the end of the input string that `d' points within. `d'
58525796c8dcSSimon Schubert is advanced into the following input string whenever necessary, but
58535796c8dcSSimon Schubert this happens before fetching; therefore, at the beginning of the
58545796c8dcSSimon Schubert loop, `d' can be pointing at the end of a string, but it cannot
58555796c8dcSSimon Schubert equal `string2'. */
58565796c8dcSSimon Schubert #ifdef WCHAR
58575796c8dcSSimon Schubert if (size1 > 0 && pos <= csize1)
58585796c8dcSSimon Schubert {
58595796c8dcSSimon Schubert mcnt = count_mbs_length(mbs_offset1, pos);
58605796c8dcSSimon Schubert d = string1 + mcnt;
58615796c8dcSSimon Schubert dend = end_match_1;
58625796c8dcSSimon Schubert }
58635796c8dcSSimon Schubert else
58645796c8dcSSimon Schubert {
58655796c8dcSSimon Schubert mcnt = count_mbs_length(mbs_offset2, pos-csize1);
58665796c8dcSSimon Schubert d = string2 + mcnt;
58675796c8dcSSimon Schubert dend = end_match_2;
58685796c8dcSSimon Schubert }
58695796c8dcSSimon Schubert
58705796c8dcSSimon Schubert if (mcnt < 0)
58715796c8dcSSimon Schubert { /* count_mbs_length return error. */
58725796c8dcSSimon Schubert FREE_VARIABLES ();
58735796c8dcSSimon Schubert return -1;
58745796c8dcSSimon Schubert }
58755796c8dcSSimon Schubert #else
58765796c8dcSSimon Schubert if (size1 > 0 && pos <= size1)
58775796c8dcSSimon Schubert {
58785796c8dcSSimon Schubert d = string1 + pos;
58795796c8dcSSimon Schubert dend = end_match_1;
58805796c8dcSSimon Schubert }
58815796c8dcSSimon Schubert else
58825796c8dcSSimon Schubert {
58835796c8dcSSimon Schubert d = string2 + pos - size1;
58845796c8dcSSimon Schubert dend = end_match_2;
58855796c8dcSSimon Schubert }
58865796c8dcSSimon Schubert #endif /* WCHAR */
58875796c8dcSSimon Schubert
58885796c8dcSSimon Schubert DEBUG_PRINT1 ("The compiled pattern is:\n");
58895796c8dcSSimon Schubert DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
58905796c8dcSSimon Schubert DEBUG_PRINT1 ("The string to match is: `");
58915796c8dcSSimon Schubert DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
58925796c8dcSSimon Schubert DEBUG_PRINT1 ("'\n");
58935796c8dcSSimon Schubert
58945796c8dcSSimon Schubert /* This loops over pattern commands. It exits by returning from the
58955796c8dcSSimon Schubert function if the match is complete, or it drops through if the match
58965796c8dcSSimon Schubert fails at this starting point in the input data. */
58975796c8dcSSimon Schubert for (;;)
58985796c8dcSSimon Schubert {
58995796c8dcSSimon Schubert #ifdef _LIBC
59005796c8dcSSimon Schubert DEBUG_PRINT2 ("\n%p: ", p);
59015796c8dcSSimon Schubert #else
59025796c8dcSSimon Schubert DEBUG_PRINT2 ("\n0x%x: ", p);
59035796c8dcSSimon Schubert #endif
59045796c8dcSSimon Schubert
59055796c8dcSSimon Schubert if (p == pend)
59065796c8dcSSimon Schubert { /* End of pattern means we might have succeeded. */
59075796c8dcSSimon Schubert DEBUG_PRINT1 ("end of pattern ... ");
59085796c8dcSSimon Schubert
59095796c8dcSSimon Schubert /* If we haven't matched the entire string, and we want the
59105796c8dcSSimon Schubert longest match, try backtracking. */
59115796c8dcSSimon Schubert if (d != end_match_2)
59125796c8dcSSimon Schubert {
59135796c8dcSSimon Schubert /* 1 if this match ends in the same string (string1 or string2)
59145796c8dcSSimon Schubert as the best previous match. */
5915cf7f2e2dSJohn Marino boolean same_str_p;
5916cf7f2e2dSJohn Marino
59175796c8dcSSimon Schubert /* 1 if this match is the best seen so far. */
59185796c8dcSSimon Schubert boolean best_match_p;
59195796c8dcSSimon Schubert
5920cf7f2e2dSJohn Marino same_str_p = (FIRST_STRING_P (match_end)
5921cf7f2e2dSJohn Marino == MATCHING_IN_FIRST_STRING);
5922cf7f2e2dSJohn Marino
59235796c8dcSSimon Schubert /* AIX compiler got confused when this was combined
59245796c8dcSSimon Schubert with the previous declaration. */
59255796c8dcSSimon Schubert if (same_str_p)
59265796c8dcSSimon Schubert best_match_p = d > match_end;
59275796c8dcSSimon Schubert else
59285796c8dcSSimon Schubert best_match_p = !MATCHING_IN_FIRST_STRING;
59295796c8dcSSimon Schubert
59305796c8dcSSimon Schubert DEBUG_PRINT1 ("backtracking.\n");
59315796c8dcSSimon Schubert
59325796c8dcSSimon Schubert if (!FAIL_STACK_EMPTY ())
59335796c8dcSSimon Schubert { /* More failure points to try. */
59345796c8dcSSimon Schubert
59355796c8dcSSimon Schubert /* If exceeds best match so far, save it. */
59365796c8dcSSimon Schubert if (!best_regs_set || best_match_p)
59375796c8dcSSimon Schubert {
59385796c8dcSSimon Schubert best_regs_set = true;
59395796c8dcSSimon Schubert match_end = d;
59405796c8dcSSimon Schubert
59415796c8dcSSimon Schubert DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
59425796c8dcSSimon Schubert
59435796c8dcSSimon Schubert for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
59445796c8dcSSimon Schubert {
59455796c8dcSSimon Schubert best_regstart[mcnt] = regstart[mcnt];
59465796c8dcSSimon Schubert best_regend[mcnt] = regend[mcnt];
59475796c8dcSSimon Schubert }
59485796c8dcSSimon Schubert }
59495796c8dcSSimon Schubert goto fail;
59505796c8dcSSimon Schubert }
59515796c8dcSSimon Schubert
59525796c8dcSSimon Schubert /* If no failure points, don't restore garbage. And if
59535796c8dcSSimon Schubert last match is real best match, don't restore second
59545796c8dcSSimon Schubert best one. */
59555796c8dcSSimon Schubert else if (best_regs_set && !best_match_p)
59565796c8dcSSimon Schubert {
59575796c8dcSSimon Schubert restore_best_regs:
59585796c8dcSSimon Schubert /* Restore best match. It may happen that `dend ==
59595796c8dcSSimon Schubert end_match_1' while the restored d is in string2.
59605796c8dcSSimon Schubert For example, the pattern `x.*y.*z' against the
59615796c8dcSSimon Schubert strings `x-' and `y-z-', if the two strings are
59625796c8dcSSimon Schubert not consecutive in memory. */
59635796c8dcSSimon Schubert DEBUG_PRINT1 ("Restoring best registers.\n");
59645796c8dcSSimon Schubert
59655796c8dcSSimon Schubert d = match_end;
59665796c8dcSSimon Schubert dend = ((d >= string1 && d <= end1)
59675796c8dcSSimon Schubert ? end_match_1 : end_match_2);
59685796c8dcSSimon Schubert
59695796c8dcSSimon Schubert for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
59705796c8dcSSimon Schubert {
59715796c8dcSSimon Schubert regstart[mcnt] = best_regstart[mcnt];
59725796c8dcSSimon Schubert regend[mcnt] = best_regend[mcnt];
59735796c8dcSSimon Schubert }
59745796c8dcSSimon Schubert }
59755796c8dcSSimon Schubert } /* d != end_match_2 */
59765796c8dcSSimon Schubert
59775796c8dcSSimon Schubert succeed_label:
59785796c8dcSSimon Schubert DEBUG_PRINT1 ("Accepting match.\n");
59795796c8dcSSimon Schubert /* If caller wants register contents data back, do it. */
59805796c8dcSSimon Schubert if (regs && !bufp->no_sub)
59815796c8dcSSimon Schubert {
59825796c8dcSSimon Schubert /* Have the register data arrays been allocated? */
59835796c8dcSSimon Schubert if (bufp->regs_allocated == REGS_UNALLOCATED)
59845796c8dcSSimon Schubert { /* No. So allocate them with malloc. We need one
59855796c8dcSSimon Schubert extra element beyond `num_regs' for the `-1' marker
59865796c8dcSSimon Schubert GNU code uses. */
59875796c8dcSSimon Schubert regs->num_regs = MAX (RE_NREGS, num_regs + 1);
59885796c8dcSSimon Schubert regs->start = TALLOC (regs->num_regs, regoff_t);
59895796c8dcSSimon Schubert regs->end = TALLOC (regs->num_regs, regoff_t);
59905796c8dcSSimon Schubert if (regs->start == NULL || regs->end == NULL)
59915796c8dcSSimon Schubert {
59925796c8dcSSimon Schubert FREE_VARIABLES ();
59935796c8dcSSimon Schubert return -2;
59945796c8dcSSimon Schubert }
59955796c8dcSSimon Schubert bufp->regs_allocated = REGS_REALLOCATE;
59965796c8dcSSimon Schubert }
59975796c8dcSSimon Schubert else if (bufp->regs_allocated == REGS_REALLOCATE)
59985796c8dcSSimon Schubert { /* Yes. If we need more elements than were already
59995796c8dcSSimon Schubert allocated, reallocate them. If we need fewer, just
60005796c8dcSSimon Schubert leave it alone. */
60015796c8dcSSimon Schubert if (regs->num_regs < num_regs + 1)
60025796c8dcSSimon Schubert {
60035796c8dcSSimon Schubert regs->num_regs = num_regs + 1;
60045796c8dcSSimon Schubert RETALLOC (regs->start, regs->num_regs, regoff_t);
60055796c8dcSSimon Schubert RETALLOC (regs->end, regs->num_regs, regoff_t);
60065796c8dcSSimon Schubert if (regs->start == NULL || regs->end == NULL)
60075796c8dcSSimon Schubert {
60085796c8dcSSimon Schubert FREE_VARIABLES ();
60095796c8dcSSimon Schubert return -2;
60105796c8dcSSimon Schubert }
60115796c8dcSSimon Schubert }
60125796c8dcSSimon Schubert }
60135796c8dcSSimon Schubert else
60145796c8dcSSimon Schubert {
60155796c8dcSSimon Schubert /* These braces fend off a "empty body in an else-statement"
60165796c8dcSSimon Schubert warning under GCC when assert expands to nothing. */
60175796c8dcSSimon Schubert assert (bufp->regs_allocated == REGS_FIXED);
60185796c8dcSSimon Schubert }
60195796c8dcSSimon Schubert
60205796c8dcSSimon Schubert /* Convert the pointer data in `regstart' and `regend' to
60215796c8dcSSimon Schubert indices. Register zero has to be set differently,
60225796c8dcSSimon Schubert since we haven't kept track of any info for it. */
60235796c8dcSSimon Schubert if (regs->num_regs > 0)
60245796c8dcSSimon Schubert {
60255796c8dcSSimon Schubert regs->start[0] = pos;
60265796c8dcSSimon Schubert #ifdef WCHAR
60275796c8dcSSimon Schubert if (MATCHING_IN_FIRST_STRING)
60285796c8dcSSimon Schubert regs->end[0] = mbs_offset1 != NULL ?
60295796c8dcSSimon Schubert mbs_offset1[d-string1] : 0;
60305796c8dcSSimon Schubert else
60315796c8dcSSimon Schubert regs->end[0] = csize1 + (mbs_offset2 != NULL ?
60325796c8dcSSimon Schubert mbs_offset2[d-string2] : 0);
60335796c8dcSSimon Schubert #else
60345796c8dcSSimon Schubert regs->end[0] = (MATCHING_IN_FIRST_STRING
60355796c8dcSSimon Schubert ? ((regoff_t) (d - string1))
60365796c8dcSSimon Schubert : ((regoff_t) (d - string2 + size1)));
60375796c8dcSSimon Schubert #endif /* WCHAR */
60385796c8dcSSimon Schubert }
60395796c8dcSSimon Schubert
60405796c8dcSSimon Schubert /* Go through the first `min (num_regs, regs->num_regs)'
60415796c8dcSSimon Schubert registers, since that is all we initialized. */
60425796c8dcSSimon Schubert for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
60435796c8dcSSimon Schubert mcnt++)
60445796c8dcSSimon Schubert {
60455796c8dcSSimon Schubert if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
60465796c8dcSSimon Schubert regs->start[mcnt] = regs->end[mcnt] = -1;
60475796c8dcSSimon Schubert else
60485796c8dcSSimon Schubert {
60495796c8dcSSimon Schubert regs->start[mcnt]
60505796c8dcSSimon Schubert = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
60515796c8dcSSimon Schubert regs->end[mcnt]
60525796c8dcSSimon Schubert = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
60535796c8dcSSimon Schubert }
60545796c8dcSSimon Schubert }
60555796c8dcSSimon Schubert
60565796c8dcSSimon Schubert /* If the regs structure we return has more elements than
60575796c8dcSSimon Schubert were in the pattern, set the extra elements to -1. If
60585796c8dcSSimon Schubert we (re)allocated the registers, this is the case,
60595796c8dcSSimon Schubert because we always allocate enough to have at least one
60605796c8dcSSimon Schubert -1 at the end. */
60615796c8dcSSimon Schubert for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
60625796c8dcSSimon Schubert regs->start[mcnt] = regs->end[mcnt] = -1;
60635796c8dcSSimon Schubert } /* regs && !bufp->no_sub */
60645796c8dcSSimon Schubert
60655796c8dcSSimon Schubert DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
60665796c8dcSSimon Schubert nfailure_points_pushed, nfailure_points_popped,
60675796c8dcSSimon Schubert nfailure_points_pushed - nfailure_points_popped);
60685796c8dcSSimon Schubert DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
60695796c8dcSSimon Schubert
60705796c8dcSSimon Schubert #ifdef WCHAR
60715796c8dcSSimon Schubert if (MATCHING_IN_FIRST_STRING)
60725796c8dcSSimon Schubert mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
60735796c8dcSSimon Schubert else
60745796c8dcSSimon Schubert mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
60755796c8dcSSimon Schubert csize1;
60765796c8dcSSimon Schubert mcnt -= pos;
60775796c8dcSSimon Schubert #else
60785796c8dcSSimon Schubert mcnt = d - pos - (MATCHING_IN_FIRST_STRING
60795796c8dcSSimon Schubert ? string1
60805796c8dcSSimon Schubert : string2 - size1);
60815796c8dcSSimon Schubert #endif /* WCHAR */
60825796c8dcSSimon Schubert
60835796c8dcSSimon Schubert DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
60845796c8dcSSimon Schubert
60855796c8dcSSimon Schubert FREE_VARIABLES ();
60865796c8dcSSimon Schubert return mcnt;
60875796c8dcSSimon Schubert }
60885796c8dcSSimon Schubert
60895796c8dcSSimon Schubert /* Otherwise match next pattern command. */
60905796c8dcSSimon Schubert switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
60915796c8dcSSimon Schubert {
60925796c8dcSSimon Schubert /* Ignore these. Used to ignore the n of succeed_n's which
60935796c8dcSSimon Schubert currently have n == 0. */
60945796c8dcSSimon Schubert case no_op:
60955796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING no_op.\n");
60965796c8dcSSimon Schubert break;
60975796c8dcSSimon Schubert
60985796c8dcSSimon Schubert case succeed:
60995796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING succeed.\n");
61005796c8dcSSimon Schubert goto succeed_label;
61015796c8dcSSimon Schubert
61025796c8dcSSimon Schubert /* Match the next n pattern characters exactly. The following
61035796c8dcSSimon Schubert byte in the pattern defines n, and the n bytes after that
61045796c8dcSSimon Schubert are the characters to match. */
61055796c8dcSSimon Schubert case exactn:
61065796c8dcSSimon Schubert #ifdef MBS_SUPPORT
61075796c8dcSSimon Schubert case exactn_bin:
61085796c8dcSSimon Schubert #endif
61095796c8dcSSimon Schubert mcnt = *p++;
61105796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
61115796c8dcSSimon Schubert
61125796c8dcSSimon Schubert /* This is written out as an if-else so we don't waste time
61135796c8dcSSimon Schubert testing `translate' inside the loop. */
61145796c8dcSSimon Schubert if (translate)
61155796c8dcSSimon Schubert {
61165796c8dcSSimon Schubert do
61175796c8dcSSimon Schubert {
61185796c8dcSSimon Schubert PREFETCH ();
61195796c8dcSSimon Schubert #ifdef WCHAR
61205796c8dcSSimon Schubert if (*d <= 0xff)
61215796c8dcSSimon Schubert {
61225796c8dcSSimon Schubert if ((UCHAR_T) translate[(unsigned char) *d++]
61235796c8dcSSimon Schubert != (UCHAR_T) *p++)
61245796c8dcSSimon Schubert goto fail;
61255796c8dcSSimon Schubert }
61265796c8dcSSimon Schubert else
61275796c8dcSSimon Schubert {
61285796c8dcSSimon Schubert if (*d++ != (CHAR_T) *p++)
61295796c8dcSSimon Schubert goto fail;
61305796c8dcSSimon Schubert }
61315796c8dcSSimon Schubert #else
61325796c8dcSSimon Schubert if ((UCHAR_T) translate[(unsigned char) *d++]
61335796c8dcSSimon Schubert != (UCHAR_T) *p++)
61345796c8dcSSimon Schubert goto fail;
61355796c8dcSSimon Schubert #endif /* WCHAR */
61365796c8dcSSimon Schubert }
61375796c8dcSSimon Schubert while (--mcnt);
61385796c8dcSSimon Schubert }
61395796c8dcSSimon Schubert else
61405796c8dcSSimon Schubert {
61415796c8dcSSimon Schubert do
61425796c8dcSSimon Schubert {
61435796c8dcSSimon Schubert PREFETCH ();
61445796c8dcSSimon Schubert if (*d++ != (CHAR_T) *p++) goto fail;
61455796c8dcSSimon Schubert }
61465796c8dcSSimon Schubert while (--mcnt);
61475796c8dcSSimon Schubert }
61485796c8dcSSimon Schubert SET_REGS_MATCHED ();
61495796c8dcSSimon Schubert break;
61505796c8dcSSimon Schubert
61515796c8dcSSimon Schubert
61525796c8dcSSimon Schubert /* Match any character except possibly a newline or a null. */
61535796c8dcSSimon Schubert case anychar:
61545796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING anychar.\n");
61555796c8dcSSimon Schubert
61565796c8dcSSimon Schubert PREFETCH ();
61575796c8dcSSimon Schubert
61585796c8dcSSimon Schubert if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
61595796c8dcSSimon Schubert || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
61605796c8dcSSimon Schubert goto fail;
61615796c8dcSSimon Schubert
61625796c8dcSSimon Schubert SET_REGS_MATCHED ();
61635796c8dcSSimon Schubert DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
61645796c8dcSSimon Schubert d++;
61655796c8dcSSimon Schubert break;
61665796c8dcSSimon Schubert
61675796c8dcSSimon Schubert
61685796c8dcSSimon Schubert case charset:
61695796c8dcSSimon Schubert case charset_not:
61705796c8dcSSimon Schubert {
61715796c8dcSSimon Schubert register UCHAR_T c;
61725796c8dcSSimon Schubert #ifdef WCHAR
61735796c8dcSSimon Schubert unsigned int i, char_class_length, coll_symbol_length,
61745796c8dcSSimon Schubert equiv_class_length, ranges_length, chars_length, length;
61755796c8dcSSimon Schubert CHAR_T *workp, *workp2, *charset_top;
61765796c8dcSSimon Schubert #define WORK_BUFFER_SIZE 128
61775796c8dcSSimon Schubert CHAR_T str_buf[WORK_BUFFER_SIZE];
61785796c8dcSSimon Schubert # ifdef _LIBC
61795796c8dcSSimon Schubert uint32_t nrules;
61805796c8dcSSimon Schubert # endif /* _LIBC */
61815796c8dcSSimon Schubert #endif /* WCHAR */
61825796c8dcSSimon Schubert boolean negate = (re_opcode_t) *(p - 1) == charset_not;
61835796c8dcSSimon Schubert
61845796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
61855796c8dcSSimon Schubert PREFETCH ();
61865796c8dcSSimon Schubert c = TRANSLATE (*d); /* The character to match. */
61875796c8dcSSimon Schubert #ifdef WCHAR
61885796c8dcSSimon Schubert # ifdef _LIBC
61895796c8dcSSimon Schubert nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
61905796c8dcSSimon Schubert # endif /* _LIBC */
61915796c8dcSSimon Schubert charset_top = p - 1;
61925796c8dcSSimon Schubert char_class_length = *p++;
61935796c8dcSSimon Schubert coll_symbol_length = *p++;
61945796c8dcSSimon Schubert equiv_class_length = *p++;
61955796c8dcSSimon Schubert ranges_length = *p++;
61965796c8dcSSimon Schubert chars_length = *p++;
61975796c8dcSSimon Schubert /* p points charset[6], so the address of the next instruction
61985796c8dcSSimon Schubert (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
61995796c8dcSSimon Schubert where l=length of char_classes, m=length of collating_symbol,
62005796c8dcSSimon Schubert n=equivalence_class, o=length of char_range,
62015796c8dcSSimon Schubert p'=length of character. */
62025796c8dcSSimon Schubert workp = p;
62035796c8dcSSimon Schubert /* Update p to indicate the next instruction. */
62045796c8dcSSimon Schubert p += char_class_length + coll_symbol_length+ equiv_class_length +
62055796c8dcSSimon Schubert 2*ranges_length + chars_length;
62065796c8dcSSimon Schubert
62075796c8dcSSimon Schubert /* match with char_class? */
62085796c8dcSSimon Schubert for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
62095796c8dcSSimon Schubert {
62105796c8dcSSimon Schubert wctype_t wctype;
62115796c8dcSSimon Schubert uintptr_t alignedp = ((uintptr_t)workp
62125796c8dcSSimon Schubert + __alignof__(wctype_t) - 1)
62135796c8dcSSimon Schubert & ~(uintptr_t)(__alignof__(wctype_t) - 1);
62145796c8dcSSimon Schubert wctype = *((wctype_t*)alignedp);
62155796c8dcSSimon Schubert workp += CHAR_CLASS_SIZE;
62165796c8dcSSimon Schubert # ifdef _LIBC
62175796c8dcSSimon Schubert if (__iswctype((wint_t)c, wctype))
62185796c8dcSSimon Schubert goto char_set_matched;
62195796c8dcSSimon Schubert # else
62205796c8dcSSimon Schubert if (iswctype((wint_t)c, wctype))
62215796c8dcSSimon Schubert goto char_set_matched;
62225796c8dcSSimon Schubert # endif
62235796c8dcSSimon Schubert }
62245796c8dcSSimon Schubert
62255796c8dcSSimon Schubert /* match with collating_symbol? */
62265796c8dcSSimon Schubert # ifdef _LIBC
62275796c8dcSSimon Schubert if (nrules != 0)
62285796c8dcSSimon Schubert {
62295796c8dcSSimon Schubert const unsigned char *extra = (const unsigned char *)
62305796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
62315796c8dcSSimon Schubert
62325796c8dcSSimon Schubert for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
62335796c8dcSSimon Schubert workp++)
62345796c8dcSSimon Schubert {
62355796c8dcSSimon Schubert int32_t *wextra;
62365796c8dcSSimon Schubert wextra = (int32_t*)(extra + *workp++);
62375796c8dcSSimon Schubert for (i = 0; i < *wextra; ++i)
62385796c8dcSSimon Schubert if (TRANSLATE(d[i]) != wextra[1 + i])
62395796c8dcSSimon Schubert break;
62405796c8dcSSimon Schubert
62415796c8dcSSimon Schubert if (i == *wextra)
62425796c8dcSSimon Schubert {
62435796c8dcSSimon Schubert /* Update d, however d will be incremented at
62445796c8dcSSimon Schubert char_set_matched:, we decrement d here. */
62455796c8dcSSimon Schubert d += i - 1;
62465796c8dcSSimon Schubert goto char_set_matched;
62475796c8dcSSimon Schubert }
62485796c8dcSSimon Schubert }
62495796c8dcSSimon Schubert }
62505796c8dcSSimon Schubert else /* (nrules == 0) */
62515796c8dcSSimon Schubert # endif
62525796c8dcSSimon Schubert /* If we can't look up collation data, we use wcscoll
62535796c8dcSSimon Schubert instead. */
62545796c8dcSSimon Schubert {
62555796c8dcSSimon Schubert for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
62565796c8dcSSimon Schubert {
62575796c8dcSSimon Schubert const CHAR_T *backup_d = d, *backup_dend = dend;
62585796c8dcSSimon Schubert # ifdef _LIBC
62595796c8dcSSimon Schubert length = __wcslen (workp);
62605796c8dcSSimon Schubert # else
62615796c8dcSSimon Schubert length = wcslen (workp);
62625796c8dcSSimon Schubert # endif
62635796c8dcSSimon Schubert
62645796c8dcSSimon Schubert /* If wcscoll(the collating symbol, whole string) > 0,
62655796c8dcSSimon Schubert any substring of the string never match with the
62665796c8dcSSimon Schubert collating symbol. */
62675796c8dcSSimon Schubert # ifdef _LIBC
62685796c8dcSSimon Schubert if (__wcscoll (workp, d) > 0)
62695796c8dcSSimon Schubert # else
62705796c8dcSSimon Schubert if (wcscoll (workp, d) > 0)
62715796c8dcSSimon Schubert # endif
62725796c8dcSSimon Schubert {
62735796c8dcSSimon Schubert workp += length + 1;
62745796c8dcSSimon Schubert continue;
62755796c8dcSSimon Schubert }
62765796c8dcSSimon Schubert
62775796c8dcSSimon Schubert /* First, we compare the collating symbol with
62785796c8dcSSimon Schubert the first character of the string.
62795796c8dcSSimon Schubert If it don't match, we add the next character to
62805796c8dcSSimon Schubert the compare buffer in turn. */
62815796c8dcSSimon Schubert for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
62825796c8dcSSimon Schubert {
62835796c8dcSSimon Schubert int match;
62845796c8dcSSimon Schubert if (d == dend)
62855796c8dcSSimon Schubert {
62865796c8dcSSimon Schubert if (dend == end_match_2)
62875796c8dcSSimon Schubert break;
62885796c8dcSSimon Schubert d = string2;
62895796c8dcSSimon Schubert dend = end_match_2;
62905796c8dcSSimon Schubert }
62915796c8dcSSimon Schubert
62925796c8dcSSimon Schubert /* add next character to the compare buffer. */
62935796c8dcSSimon Schubert str_buf[i] = TRANSLATE(*d);
62945796c8dcSSimon Schubert str_buf[i+1] = '\0';
62955796c8dcSSimon Schubert
62965796c8dcSSimon Schubert # ifdef _LIBC
62975796c8dcSSimon Schubert match = __wcscoll (workp, str_buf);
62985796c8dcSSimon Schubert # else
62995796c8dcSSimon Schubert match = wcscoll (workp, str_buf);
63005796c8dcSSimon Schubert # endif
63015796c8dcSSimon Schubert if (match == 0)
63025796c8dcSSimon Schubert goto char_set_matched;
63035796c8dcSSimon Schubert
63045796c8dcSSimon Schubert if (match < 0)
63055796c8dcSSimon Schubert /* (str_buf > workp) indicate (str_buf + X > workp),
63065796c8dcSSimon Schubert because for all X (str_buf + X > str_buf).
63075796c8dcSSimon Schubert So we don't need continue this loop. */
63085796c8dcSSimon Schubert break;
63095796c8dcSSimon Schubert
63105796c8dcSSimon Schubert /* Otherwise(str_buf < workp),
63115796c8dcSSimon Schubert (str_buf+next_character) may equals (workp).
63125796c8dcSSimon Schubert So we continue this loop. */
63135796c8dcSSimon Schubert }
63145796c8dcSSimon Schubert /* not matched */
63155796c8dcSSimon Schubert d = backup_d;
63165796c8dcSSimon Schubert dend = backup_dend;
63175796c8dcSSimon Schubert workp += length + 1;
63185796c8dcSSimon Schubert }
63195796c8dcSSimon Schubert }
63205796c8dcSSimon Schubert /* match with equivalence_class? */
63215796c8dcSSimon Schubert # ifdef _LIBC
63225796c8dcSSimon Schubert if (nrules != 0)
63235796c8dcSSimon Schubert {
63245796c8dcSSimon Schubert const CHAR_T *backup_d = d, *backup_dend = dend;
63255796c8dcSSimon Schubert /* Try to match the equivalence class against
63265796c8dcSSimon Schubert those known to the collate implementation. */
63275796c8dcSSimon Schubert const int32_t *table;
63285796c8dcSSimon Schubert const int32_t *weights;
63295796c8dcSSimon Schubert const int32_t *extra;
63305796c8dcSSimon Schubert const int32_t *indirect;
63315796c8dcSSimon Schubert int32_t idx, idx2;
63325796c8dcSSimon Schubert wint_t *cp;
63335796c8dcSSimon Schubert size_t len;
63345796c8dcSSimon Schubert
63355796c8dcSSimon Schubert /* This #include defines a local function! */
63365796c8dcSSimon Schubert # include <locale/weightwc.h>
63375796c8dcSSimon Schubert
63385796c8dcSSimon Schubert table = (const int32_t *)
63395796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
63405796c8dcSSimon Schubert weights = (const wint_t *)
63415796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
63425796c8dcSSimon Schubert extra = (const wint_t *)
63435796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
63445796c8dcSSimon Schubert indirect = (const int32_t *)
63455796c8dcSSimon Schubert _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
63465796c8dcSSimon Schubert
63475796c8dcSSimon Schubert /* Write 1 collating element to str_buf, and
63485796c8dcSSimon Schubert get its index. */
63495796c8dcSSimon Schubert idx2 = 0;
63505796c8dcSSimon Schubert
63515796c8dcSSimon Schubert for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
63525796c8dcSSimon Schubert {
63535796c8dcSSimon Schubert cp = (wint_t*)str_buf;
63545796c8dcSSimon Schubert if (d == dend)
63555796c8dcSSimon Schubert {
63565796c8dcSSimon Schubert if (dend == end_match_2)
63575796c8dcSSimon Schubert break;
63585796c8dcSSimon Schubert d = string2;
63595796c8dcSSimon Schubert dend = end_match_2;
63605796c8dcSSimon Schubert }
63615796c8dcSSimon Schubert str_buf[i] = TRANSLATE(*(d+i));
63625796c8dcSSimon Schubert str_buf[i+1] = '\0'; /* sentinel */
63635796c8dcSSimon Schubert idx2 = findidx ((const wint_t**)&cp);
63645796c8dcSSimon Schubert }
63655796c8dcSSimon Schubert
63665796c8dcSSimon Schubert /* Update d, however d will be incremented at
63675796c8dcSSimon Schubert char_set_matched:, we decrement d here. */
63685796c8dcSSimon Schubert d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
63695796c8dcSSimon Schubert if (d >= dend)
63705796c8dcSSimon Schubert {
63715796c8dcSSimon Schubert if (dend == end_match_2)
63725796c8dcSSimon Schubert d = dend;
63735796c8dcSSimon Schubert else
63745796c8dcSSimon Schubert {
63755796c8dcSSimon Schubert d = string2;
63765796c8dcSSimon Schubert dend = end_match_2;
63775796c8dcSSimon Schubert }
63785796c8dcSSimon Schubert }
63795796c8dcSSimon Schubert
63805796c8dcSSimon Schubert len = weights[idx2];
63815796c8dcSSimon Schubert
63825796c8dcSSimon Schubert for (workp2 = workp + equiv_class_length ; workp < workp2 ;
63835796c8dcSSimon Schubert workp++)
63845796c8dcSSimon Schubert {
63855796c8dcSSimon Schubert idx = (int32_t)*workp;
63865796c8dcSSimon Schubert /* We already checked idx != 0 in regex_compile. */
63875796c8dcSSimon Schubert
63885796c8dcSSimon Schubert if (idx2 != 0 && len == weights[idx])
63895796c8dcSSimon Schubert {
63905796c8dcSSimon Schubert int cnt = 0;
63915796c8dcSSimon Schubert while (cnt < len && (weights[idx + 1 + cnt]
63925796c8dcSSimon Schubert == weights[idx2 + 1 + cnt]))
63935796c8dcSSimon Schubert ++cnt;
63945796c8dcSSimon Schubert
63955796c8dcSSimon Schubert if (cnt == len)
63965796c8dcSSimon Schubert goto char_set_matched;
63975796c8dcSSimon Schubert }
63985796c8dcSSimon Schubert }
63995796c8dcSSimon Schubert /* not matched */
64005796c8dcSSimon Schubert d = backup_d;
64015796c8dcSSimon Schubert dend = backup_dend;
64025796c8dcSSimon Schubert }
64035796c8dcSSimon Schubert else /* (nrules == 0) */
64045796c8dcSSimon Schubert # endif
64055796c8dcSSimon Schubert /* If we can't look up collation data, we use wcscoll
64065796c8dcSSimon Schubert instead. */
64075796c8dcSSimon Schubert {
64085796c8dcSSimon Schubert for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
64095796c8dcSSimon Schubert {
64105796c8dcSSimon Schubert const CHAR_T *backup_d = d, *backup_dend = dend;
64115796c8dcSSimon Schubert # ifdef _LIBC
64125796c8dcSSimon Schubert length = __wcslen (workp);
64135796c8dcSSimon Schubert # else
64145796c8dcSSimon Schubert length = wcslen (workp);
64155796c8dcSSimon Schubert # endif
64165796c8dcSSimon Schubert
64175796c8dcSSimon Schubert /* If wcscoll(the collating symbol, whole string) > 0,
64185796c8dcSSimon Schubert any substring of the string never match with the
64195796c8dcSSimon Schubert collating symbol. */
64205796c8dcSSimon Schubert # ifdef _LIBC
64215796c8dcSSimon Schubert if (__wcscoll (workp, d) > 0)
64225796c8dcSSimon Schubert # else
64235796c8dcSSimon Schubert if (wcscoll (workp, d) > 0)
64245796c8dcSSimon Schubert # endif
64255796c8dcSSimon Schubert {
64265796c8dcSSimon Schubert workp += length + 1;
64275796c8dcSSimon Schubert break;
64285796c8dcSSimon Schubert }
64295796c8dcSSimon Schubert
64305796c8dcSSimon Schubert /* First, we compare the equivalence class with
64315796c8dcSSimon Schubert the first character of the string.
64325796c8dcSSimon Schubert If it don't match, we add the next character to
64335796c8dcSSimon Schubert the compare buffer in turn. */
64345796c8dcSSimon Schubert for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
64355796c8dcSSimon Schubert {
64365796c8dcSSimon Schubert int match;
64375796c8dcSSimon Schubert if (d == dend)
64385796c8dcSSimon Schubert {
64395796c8dcSSimon Schubert if (dend == end_match_2)
64405796c8dcSSimon Schubert break;
64415796c8dcSSimon Schubert d = string2;
64425796c8dcSSimon Schubert dend = end_match_2;
64435796c8dcSSimon Schubert }
64445796c8dcSSimon Schubert
64455796c8dcSSimon Schubert /* add next character to the compare buffer. */
64465796c8dcSSimon Schubert str_buf[i] = TRANSLATE(*d);
64475796c8dcSSimon Schubert str_buf[i+1] = '\0';
64485796c8dcSSimon Schubert
64495796c8dcSSimon Schubert # ifdef _LIBC
64505796c8dcSSimon Schubert match = __wcscoll (workp, str_buf);
64515796c8dcSSimon Schubert # else
64525796c8dcSSimon Schubert match = wcscoll (workp, str_buf);
64535796c8dcSSimon Schubert # endif
64545796c8dcSSimon Schubert
64555796c8dcSSimon Schubert if (match == 0)
64565796c8dcSSimon Schubert goto char_set_matched;
64575796c8dcSSimon Schubert
64585796c8dcSSimon Schubert if (match < 0)
64595796c8dcSSimon Schubert /* (str_buf > workp) indicate (str_buf + X > workp),
64605796c8dcSSimon Schubert because for all X (str_buf + X > str_buf).
64615796c8dcSSimon Schubert So we don't need continue this loop. */
64625796c8dcSSimon Schubert break;
64635796c8dcSSimon Schubert
64645796c8dcSSimon Schubert /* Otherwise(str_buf < workp),
64655796c8dcSSimon Schubert (str_buf+next_character) may equals (workp).
64665796c8dcSSimon Schubert So we continue this loop. */
64675796c8dcSSimon Schubert }
64685796c8dcSSimon Schubert /* not matched */
64695796c8dcSSimon Schubert d = backup_d;
64705796c8dcSSimon Schubert dend = backup_dend;
64715796c8dcSSimon Schubert workp += length + 1;
64725796c8dcSSimon Schubert }
64735796c8dcSSimon Schubert }
64745796c8dcSSimon Schubert
64755796c8dcSSimon Schubert /* match with char_range? */
64765796c8dcSSimon Schubert # ifdef _LIBC
64775796c8dcSSimon Schubert if (nrules != 0)
64785796c8dcSSimon Schubert {
64795796c8dcSSimon Schubert uint32_t collseqval;
64805796c8dcSSimon Schubert const char *collseq = (const char *)
64815796c8dcSSimon Schubert _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
64825796c8dcSSimon Schubert
64835796c8dcSSimon Schubert collseqval = collseq_table_lookup (collseq, c);
64845796c8dcSSimon Schubert
64855796c8dcSSimon Schubert for (; workp < p - chars_length ;)
64865796c8dcSSimon Schubert {
64875796c8dcSSimon Schubert uint32_t start_val, end_val;
64885796c8dcSSimon Schubert
64895796c8dcSSimon Schubert /* We already compute the collation sequence value
64905796c8dcSSimon Schubert of the characters (or collating symbols). */
64915796c8dcSSimon Schubert start_val = (uint32_t) *workp++; /* range_start */
64925796c8dcSSimon Schubert end_val = (uint32_t) *workp++; /* range_end */
64935796c8dcSSimon Schubert
64945796c8dcSSimon Schubert if (start_val <= collseqval && collseqval <= end_val)
64955796c8dcSSimon Schubert goto char_set_matched;
64965796c8dcSSimon Schubert }
64975796c8dcSSimon Schubert }
64985796c8dcSSimon Schubert else
64995796c8dcSSimon Schubert # endif
65005796c8dcSSimon Schubert {
65015796c8dcSSimon Schubert /* We set range_start_char at str_buf[0], range_end_char
65025796c8dcSSimon Schubert at str_buf[4], and compared char at str_buf[2]. */
65035796c8dcSSimon Schubert str_buf[1] = 0;
65045796c8dcSSimon Schubert str_buf[2] = c;
65055796c8dcSSimon Schubert str_buf[3] = 0;
65065796c8dcSSimon Schubert str_buf[5] = 0;
65075796c8dcSSimon Schubert for (; workp < p - chars_length ;)
65085796c8dcSSimon Schubert {
65095796c8dcSSimon Schubert wchar_t *range_start_char, *range_end_char;
65105796c8dcSSimon Schubert
65115796c8dcSSimon Schubert /* match if (range_start_char <= c <= range_end_char). */
65125796c8dcSSimon Schubert
65135796c8dcSSimon Schubert /* If range_start(or end) < 0, we assume -range_start(end)
65145796c8dcSSimon Schubert is the offset of the collating symbol which is specified
65155796c8dcSSimon Schubert as the character of the range start(end). */
65165796c8dcSSimon Schubert
65175796c8dcSSimon Schubert /* range_start */
65185796c8dcSSimon Schubert if (*workp < 0)
65195796c8dcSSimon Schubert range_start_char = charset_top - (*workp++);
65205796c8dcSSimon Schubert else
65215796c8dcSSimon Schubert {
65225796c8dcSSimon Schubert str_buf[0] = *workp++;
65235796c8dcSSimon Schubert range_start_char = str_buf;
65245796c8dcSSimon Schubert }
65255796c8dcSSimon Schubert
65265796c8dcSSimon Schubert /* range_end */
65275796c8dcSSimon Schubert if (*workp < 0)
65285796c8dcSSimon Schubert range_end_char = charset_top - (*workp++);
65295796c8dcSSimon Schubert else
65305796c8dcSSimon Schubert {
65315796c8dcSSimon Schubert str_buf[4] = *workp++;
65325796c8dcSSimon Schubert range_end_char = str_buf + 4;
65335796c8dcSSimon Schubert }
65345796c8dcSSimon Schubert
65355796c8dcSSimon Schubert # ifdef _LIBC
65365796c8dcSSimon Schubert if (__wcscoll (range_start_char, str_buf+2) <= 0
65375796c8dcSSimon Schubert && __wcscoll (str_buf+2, range_end_char) <= 0)
65385796c8dcSSimon Schubert # else
65395796c8dcSSimon Schubert if (wcscoll (range_start_char, str_buf+2) <= 0
65405796c8dcSSimon Schubert && wcscoll (str_buf+2, range_end_char) <= 0)
65415796c8dcSSimon Schubert # endif
65425796c8dcSSimon Schubert goto char_set_matched;
65435796c8dcSSimon Schubert }
65445796c8dcSSimon Schubert }
65455796c8dcSSimon Schubert
65465796c8dcSSimon Schubert /* match with char? */
65475796c8dcSSimon Schubert for (; workp < p ; workp++)
65485796c8dcSSimon Schubert if (c == *workp)
65495796c8dcSSimon Schubert goto char_set_matched;
65505796c8dcSSimon Schubert
65515796c8dcSSimon Schubert negate = !negate;
65525796c8dcSSimon Schubert
65535796c8dcSSimon Schubert char_set_matched:
65545796c8dcSSimon Schubert if (negate) goto fail;
65555796c8dcSSimon Schubert #else
65565796c8dcSSimon Schubert /* Cast to `unsigned' instead of `unsigned char' in case the
65575796c8dcSSimon Schubert bit list is a full 32 bytes long. */
65585796c8dcSSimon Schubert if (c < (unsigned) (*p * BYTEWIDTH)
65595796c8dcSSimon Schubert && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
65605796c8dcSSimon Schubert negate = !negate;
65615796c8dcSSimon Schubert
65625796c8dcSSimon Schubert p += 1 + *p;
65635796c8dcSSimon Schubert
65645796c8dcSSimon Schubert if (!negate) goto fail;
65655796c8dcSSimon Schubert #undef WORK_BUFFER_SIZE
65665796c8dcSSimon Schubert #endif /* WCHAR */
65675796c8dcSSimon Schubert SET_REGS_MATCHED ();
65685796c8dcSSimon Schubert d++;
65695796c8dcSSimon Schubert break;
65705796c8dcSSimon Schubert }
65715796c8dcSSimon Schubert
65725796c8dcSSimon Schubert
65735796c8dcSSimon Schubert /* The beginning of a group is represented by start_memory.
65745796c8dcSSimon Schubert The arguments are the register number in the next byte, and the
65755796c8dcSSimon Schubert number of groups inner to this one in the next. The text
65765796c8dcSSimon Schubert matched within the group is recorded (in the internal
65775796c8dcSSimon Schubert registers data structure) under the register number. */
65785796c8dcSSimon Schubert case start_memory:
65795796c8dcSSimon Schubert DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
65805796c8dcSSimon Schubert (long int) *p, (long int) p[1]);
65815796c8dcSSimon Schubert
65825796c8dcSSimon Schubert /* Find out if this group can match the empty string. */
65835796c8dcSSimon Schubert p1 = p; /* To send to group_match_null_string_p. */
65845796c8dcSSimon Schubert
65855796c8dcSSimon Schubert if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
65865796c8dcSSimon Schubert REG_MATCH_NULL_STRING_P (reg_info[*p])
65875796c8dcSSimon Schubert = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
65885796c8dcSSimon Schubert
65895796c8dcSSimon Schubert /* Save the position in the string where we were the last time
65905796c8dcSSimon Schubert we were at this open-group operator in case the group is
65915796c8dcSSimon Schubert operated upon by a repetition operator, e.g., with `(a*)*b'
65925796c8dcSSimon Schubert against `ab'; then we want to ignore where we are now in
65935796c8dcSSimon Schubert the string in case this attempt to match fails. */
65945796c8dcSSimon Schubert old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
65955796c8dcSSimon Schubert ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
65965796c8dcSSimon Schubert : regstart[*p];
65975796c8dcSSimon Schubert DEBUG_PRINT2 (" old_regstart: %d\n",
65985796c8dcSSimon Schubert POINTER_TO_OFFSET (old_regstart[*p]));
65995796c8dcSSimon Schubert
66005796c8dcSSimon Schubert regstart[*p] = d;
66015796c8dcSSimon Schubert DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
66025796c8dcSSimon Schubert
66035796c8dcSSimon Schubert IS_ACTIVE (reg_info[*p]) = 1;
66045796c8dcSSimon Schubert MATCHED_SOMETHING (reg_info[*p]) = 0;
66055796c8dcSSimon Schubert
66065796c8dcSSimon Schubert /* Clear this whenever we change the register activity status. */
66075796c8dcSSimon Schubert set_regs_matched_done = 0;
66085796c8dcSSimon Schubert
66095796c8dcSSimon Schubert /* This is the new highest active register. */
66105796c8dcSSimon Schubert highest_active_reg = *p;
66115796c8dcSSimon Schubert
66125796c8dcSSimon Schubert /* If nothing was active before, this is the new lowest active
66135796c8dcSSimon Schubert register. */
66145796c8dcSSimon Schubert if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
66155796c8dcSSimon Schubert lowest_active_reg = *p;
66165796c8dcSSimon Schubert
66175796c8dcSSimon Schubert /* Move past the register number and inner group count. */
66185796c8dcSSimon Schubert p += 2;
66195796c8dcSSimon Schubert just_past_start_mem = p;
66205796c8dcSSimon Schubert
66215796c8dcSSimon Schubert break;
66225796c8dcSSimon Schubert
66235796c8dcSSimon Schubert
66245796c8dcSSimon Schubert /* The stop_memory opcode represents the end of a group. Its
66255796c8dcSSimon Schubert arguments are the same as start_memory's: the register
66265796c8dcSSimon Schubert number, and the number of inner groups. */
66275796c8dcSSimon Schubert case stop_memory:
66285796c8dcSSimon Schubert DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
66295796c8dcSSimon Schubert (long int) *p, (long int) p[1]);
66305796c8dcSSimon Schubert
66315796c8dcSSimon Schubert /* We need to save the string position the last time we were at
66325796c8dcSSimon Schubert this close-group operator in case the group is operated
66335796c8dcSSimon Schubert upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
66345796c8dcSSimon Schubert against `aba'; then we want to ignore where we are now in
66355796c8dcSSimon Schubert the string in case this attempt to match fails. */
66365796c8dcSSimon Schubert old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
66375796c8dcSSimon Schubert ? REG_UNSET (regend[*p]) ? d : regend[*p]
66385796c8dcSSimon Schubert : regend[*p];
66395796c8dcSSimon Schubert DEBUG_PRINT2 (" old_regend: %d\n",
66405796c8dcSSimon Schubert POINTER_TO_OFFSET (old_regend[*p]));
66415796c8dcSSimon Schubert
66425796c8dcSSimon Schubert regend[*p] = d;
66435796c8dcSSimon Schubert DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
66445796c8dcSSimon Schubert
66455796c8dcSSimon Schubert /* This register isn't active anymore. */
66465796c8dcSSimon Schubert IS_ACTIVE (reg_info[*p]) = 0;
66475796c8dcSSimon Schubert
66485796c8dcSSimon Schubert /* Clear this whenever we change the register activity status. */
66495796c8dcSSimon Schubert set_regs_matched_done = 0;
66505796c8dcSSimon Schubert
66515796c8dcSSimon Schubert /* If this was the only register active, nothing is active
66525796c8dcSSimon Schubert anymore. */
66535796c8dcSSimon Schubert if (lowest_active_reg == highest_active_reg)
66545796c8dcSSimon Schubert {
66555796c8dcSSimon Schubert lowest_active_reg = NO_LOWEST_ACTIVE_REG;
66565796c8dcSSimon Schubert highest_active_reg = NO_HIGHEST_ACTIVE_REG;
66575796c8dcSSimon Schubert }
66585796c8dcSSimon Schubert else
66595796c8dcSSimon Schubert { /* We must scan for the new highest active register, since
66605796c8dcSSimon Schubert it isn't necessarily one less than now: consider
66615796c8dcSSimon Schubert (a(b)c(d(e)f)g). When group 3 ends, after the f), the
66625796c8dcSSimon Schubert new highest active register is 1. */
66635796c8dcSSimon Schubert UCHAR_T r = *p - 1;
66645796c8dcSSimon Schubert while (r > 0 && !IS_ACTIVE (reg_info[r]))
66655796c8dcSSimon Schubert r--;
66665796c8dcSSimon Schubert
66675796c8dcSSimon Schubert /* If we end up at register zero, that means that we saved
66685796c8dcSSimon Schubert the registers as the result of an `on_failure_jump', not
66695796c8dcSSimon Schubert a `start_memory', and we jumped to past the innermost
66705796c8dcSSimon Schubert `stop_memory'. For example, in ((.)*) we save
66715796c8dcSSimon Schubert registers 1 and 2 as a result of the *, but when we pop
66725796c8dcSSimon Schubert back to the second ), we are at the stop_memory 1.
66735796c8dcSSimon Schubert Thus, nothing is active. */
66745796c8dcSSimon Schubert if (r == 0)
66755796c8dcSSimon Schubert {
66765796c8dcSSimon Schubert lowest_active_reg = NO_LOWEST_ACTIVE_REG;
66775796c8dcSSimon Schubert highest_active_reg = NO_HIGHEST_ACTIVE_REG;
66785796c8dcSSimon Schubert }
66795796c8dcSSimon Schubert else
66805796c8dcSSimon Schubert highest_active_reg = r;
66815796c8dcSSimon Schubert }
66825796c8dcSSimon Schubert
66835796c8dcSSimon Schubert /* If just failed to match something this time around with a
66845796c8dcSSimon Schubert group that's operated on by a repetition operator, try to
66855796c8dcSSimon Schubert force exit from the ``loop'', and restore the register
66865796c8dcSSimon Schubert information for this group that we had before trying this
66875796c8dcSSimon Schubert last match. */
66885796c8dcSSimon Schubert if ((!MATCHED_SOMETHING (reg_info[*p])
66895796c8dcSSimon Schubert || just_past_start_mem == p - 1)
66905796c8dcSSimon Schubert && (p + 2) < pend)
66915796c8dcSSimon Schubert {
66925796c8dcSSimon Schubert boolean is_a_jump_n = false;
66935796c8dcSSimon Schubert
66945796c8dcSSimon Schubert p1 = p + 2;
66955796c8dcSSimon Schubert mcnt = 0;
66965796c8dcSSimon Schubert switch ((re_opcode_t) *p1++)
66975796c8dcSSimon Schubert {
66985796c8dcSSimon Schubert case jump_n:
66995796c8dcSSimon Schubert is_a_jump_n = true;
67005796c8dcSSimon Schubert case pop_failure_jump:
67015796c8dcSSimon Schubert case maybe_pop_jump:
67025796c8dcSSimon Schubert case jump:
67035796c8dcSSimon Schubert case dummy_failure_jump:
67045796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
67055796c8dcSSimon Schubert if (is_a_jump_n)
67065796c8dcSSimon Schubert p1 += OFFSET_ADDRESS_SIZE;
67075796c8dcSSimon Schubert break;
67085796c8dcSSimon Schubert
67095796c8dcSSimon Schubert default:
67105796c8dcSSimon Schubert /* do nothing */ ;
67115796c8dcSSimon Schubert }
67125796c8dcSSimon Schubert p1 += mcnt;
67135796c8dcSSimon Schubert
67145796c8dcSSimon Schubert /* If the next operation is a jump backwards in the pattern
67155796c8dcSSimon Schubert to an on_failure_jump right before the start_memory
67165796c8dcSSimon Schubert corresponding to this stop_memory, exit from the loop
67175796c8dcSSimon Schubert by forcing a failure after pushing on the stack the
67185796c8dcSSimon Schubert on_failure_jump's jump in the pattern, and d. */
67195796c8dcSSimon Schubert if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
67205796c8dcSSimon Schubert && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
67215796c8dcSSimon Schubert && p1[2+OFFSET_ADDRESS_SIZE] == *p)
67225796c8dcSSimon Schubert {
67235796c8dcSSimon Schubert /* If this group ever matched anything, then restore
67245796c8dcSSimon Schubert what its registers were before trying this last
67255796c8dcSSimon Schubert failed match, e.g., with `(a*)*b' against `ab' for
67265796c8dcSSimon Schubert regstart[1], and, e.g., with `((a*)*(b*)*)*'
67275796c8dcSSimon Schubert against `aba' for regend[3].
67285796c8dcSSimon Schubert
67295796c8dcSSimon Schubert Also restore the registers for inner groups for,
67305796c8dcSSimon Schubert e.g., `((a*)(b*))*' against `aba' (register 3 would
67315796c8dcSSimon Schubert otherwise get trashed). */
67325796c8dcSSimon Schubert
67335796c8dcSSimon Schubert if (EVER_MATCHED_SOMETHING (reg_info[*p]))
67345796c8dcSSimon Schubert {
67355796c8dcSSimon Schubert unsigned r;
67365796c8dcSSimon Schubert
67375796c8dcSSimon Schubert EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
67385796c8dcSSimon Schubert
67395796c8dcSSimon Schubert /* Restore this and inner groups' (if any) registers. */
67405796c8dcSSimon Schubert for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
67415796c8dcSSimon Schubert r++)
67425796c8dcSSimon Schubert {
67435796c8dcSSimon Schubert regstart[r] = old_regstart[r];
67445796c8dcSSimon Schubert
67455796c8dcSSimon Schubert /* xx why this test? */
67465796c8dcSSimon Schubert if (old_regend[r] >= regstart[r])
67475796c8dcSSimon Schubert regend[r] = old_regend[r];
67485796c8dcSSimon Schubert }
67495796c8dcSSimon Schubert }
67505796c8dcSSimon Schubert p1++;
67515796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
67525796c8dcSSimon Schubert PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
67535796c8dcSSimon Schubert
67545796c8dcSSimon Schubert goto fail;
67555796c8dcSSimon Schubert }
67565796c8dcSSimon Schubert }
67575796c8dcSSimon Schubert
67585796c8dcSSimon Schubert /* Move past the register number and the inner group count. */
67595796c8dcSSimon Schubert p += 2;
67605796c8dcSSimon Schubert break;
67615796c8dcSSimon Schubert
67625796c8dcSSimon Schubert
67635796c8dcSSimon Schubert /* \<digit> has been turned into a `duplicate' command which is
67645796c8dcSSimon Schubert followed by the numeric value of <digit> as the register number. */
67655796c8dcSSimon Schubert case duplicate:
67665796c8dcSSimon Schubert {
67675796c8dcSSimon Schubert register const CHAR_T *d2, *dend2;
67685796c8dcSSimon Schubert int regno = *p++; /* Get which register to match against. */
67695796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
67705796c8dcSSimon Schubert
67715796c8dcSSimon Schubert /* Can't back reference a group which we've never matched. */
67725796c8dcSSimon Schubert if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
67735796c8dcSSimon Schubert goto fail;
67745796c8dcSSimon Schubert
67755796c8dcSSimon Schubert /* Where in input to try to start matching. */
67765796c8dcSSimon Schubert d2 = regstart[regno];
67775796c8dcSSimon Schubert
67785796c8dcSSimon Schubert /* Where to stop matching; if both the place to start and
67795796c8dcSSimon Schubert the place to stop matching are in the same string, then
67805796c8dcSSimon Schubert set to the place to stop, otherwise, for now have to use
67815796c8dcSSimon Schubert the end of the first string. */
67825796c8dcSSimon Schubert
67835796c8dcSSimon Schubert dend2 = ((FIRST_STRING_P (regstart[regno])
67845796c8dcSSimon Schubert == FIRST_STRING_P (regend[regno]))
67855796c8dcSSimon Schubert ? regend[regno] : end_match_1);
67865796c8dcSSimon Schubert for (;;)
67875796c8dcSSimon Schubert {
67885796c8dcSSimon Schubert /* If necessary, advance to next segment in register
67895796c8dcSSimon Schubert contents. */
67905796c8dcSSimon Schubert while (d2 == dend2)
67915796c8dcSSimon Schubert {
67925796c8dcSSimon Schubert if (dend2 == end_match_2) break;
67935796c8dcSSimon Schubert if (dend2 == regend[regno]) break;
67945796c8dcSSimon Schubert
67955796c8dcSSimon Schubert /* End of string1 => advance to string2. */
67965796c8dcSSimon Schubert d2 = string2;
67975796c8dcSSimon Schubert dend2 = regend[regno];
67985796c8dcSSimon Schubert }
67995796c8dcSSimon Schubert /* At end of register contents => success */
68005796c8dcSSimon Schubert if (d2 == dend2) break;
68015796c8dcSSimon Schubert
68025796c8dcSSimon Schubert /* If necessary, advance to next segment in data. */
68035796c8dcSSimon Schubert PREFETCH ();
68045796c8dcSSimon Schubert
68055796c8dcSSimon Schubert /* How many characters left in this segment to match. */
68065796c8dcSSimon Schubert mcnt = dend - d;
68075796c8dcSSimon Schubert
68085796c8dcSSimon Schubert /* Want how many consecutive characters we can match in
68095796c8dcSSimon Schubert one shot, so, if necessary, adjust the count. */
68105796c8dcSSimon Schubert if (mcnt > dend2 - d2)
68115796c8dcSSimon Schubert mcnt = dend2 - d2;
68125796c8dcSSimon Schubert
68135796c8dcSSimon Schubert /* Compare that many; failure if mismatch, else move
68145796c8dcSSimon Schubert past them. */
68155796c8dcSSimon Schubert if (translate
68165796c8dcSSimon Schubert ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
68175796c8dcSSimon Schubert : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
68185796c8dcSSimon Schubert goto fail;
68195796c8dcSSimon Schubert d += mcnt, d2 += mcnt;
68205796c8dcSSimon Schubert
68215796c8dcSSimon Schubert /* Do this because we've match some characters. */
68225796c8dcSSimon Schubert SET_REGS_MATCHED ();
68235796c8dcSSimon Schubert }
68245796c8dcSSimon Schubert }
68255796c8dcSSimon Schubert break;
68265796c8dcSSimon Schubert
68275796c8dcSSimon Schubert
68285796c8dcSSimon Schubert /* begline matches the empty string at the beginning of the string
68295796c8dcSSimon Schubert (unless `not_bol' is set in `bufp'), and, if
68305796c8dcSSimon Schubert `newline_anchor' is set, after newlines. */
68315796c8dcSSimon Schubert case begline:
68325796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING begline.\n");
68335796c8dcSSimon Schubert
68345796c8dcSSimon Schubert if (AT_STRINGS_BEG (d))
68355796c8dcSSimon Schubert {
68365796c8dcSSimon Schubert if (!bufp->not_bol) break;
68375796c8dcSSimon Schubert }
68385796c8dcSSimon Schubert else if (d[-1] == '\n' && bufp->newline_anchor)
68395796c8dcSSimon Schubert {
68405796c8dcSSimon Schubert break;
68415796c8dcSSimon Schubert }
68425796c8dcSSimon Schubert /* In all other cases, we fail. */
68435796c8dcSSimon Schubert goto fail;
68445796c8dcSSimon Schubert
68455796c8dcSSimon Schubert
68465796c8dcSSimon Schubert /* endline is the dual of begline. */
68475796c8dcSSimon Schubert case endline:
68485796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING endline.\n");
68495796c8dcSSimon Schubert
68505796c8dcSSimon Schubert if (AT_STRINGS_END (d))
68515796c8dcSSimon Schubert {
68525796c8dcSSimon Schubert if (!bufp->not_eol) break;
68535796c8dcSSimon Schubert }
68545796c8dcSSimon Schubert
68555796c8dcSSimon Schubert /* We have to ``prefetch'' the next character. */
68565796c8dcSSimon Schubert else if ((d == end1 ? *string2 : *d) == '\n'
68575796c8dcSSimon Schubert && bufp->newline_anchor)
68585796c8dcSSimon Schubert {
68595796c8dcSSimon Schubert break;
68605796c8dcSSimon Schubert }
68615796c8dcSSimon Schubert goto fail;
68625796c8dcSSimon Schubert
68635796c8dcSSimon Schubert
68645796c8dcSSimon Schubert /* Match at the very beginning of the data. */
68655796c8dcSSimon Schubert case begbuf:
68665796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING begbuf.\n");
68675796c8dcSSimon Schubert if (AT_STRINGS_BEG (d))
68685796c8dcSSimon Schubert break;
68695796c8dcSSimon Schubert goto fail;
68705796c8dcSSimon Schubert
68715796c8dcSSimon Schubert
68725796c8dcSSimon Schubert /* Match at the very end of the data. */
68735796c8dcSSimon Schubert case endbuf:
68745796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING endbuf.\n");
68755796c8dcSSimon Schubert if (AT_STRINGS_END (d))
68765796c8dcSSimon Schubert break;
68775796c8dcSSimon Schubert goto fail;
68785796c8dcSSimon Schubert
68795796c8dcSSimon Schubert
68805796c8dcSSimon Schubert /* on_failure_keep_string_jump is used to optimize `.*\n'. It
68815796c8dcSSimon Schubert pushes NULL as the value for the string on the stack. Then
68825796c8dcSSimon Schubert `pop_failure_point' will keep the current value for the
68835796c8dcSSimon Schubert string, instead of restoring it. To see why, consider
68845796c8dcSSimon Schubert matching `foo\nbar' against `.*\n'. The .* matches the foo;
68855796c8dcSSimon Schubert then the . fails against the \n. But the next thing we want
68865796c8dcSSimon Schubert to do is match the \n against the \n; if we restored the
68875796c8dcSSimon Schubert string value, we would be back at the foo.
68885796c8dcSSimon Schubert
68895796c8dcSSimon Schubert Because this is used only in specific cases, we don't need to
68905796c8dcSSimon Schubert check all the things that `on_failure_jump' does, to make
68915796c8dcSSimon Schubert sure the right things get saved on the stack. Hence we don't
68925796c8dcSSimon Schubert share its code. The only reason to push anything on the
68935796c8dcSSimon Schubert stack at all is that otherwise we would have to change
68945796c8dcSSimon Schubert `anychar's code to do something besides goto fail in this
68955796c8dcSSimon Schubert case; that seems worse than this. */
68965796c8dcSSimon Schubert case on_failure_keep_string_jump:
68975796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
68985796c8dcSSimon Schubert
68995796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p);
69005796c8dcSSimon Schubert #ifdef _LIBC
69015796c8dcSSimon Schubert DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
69025796c8dcSSimon Schubert #else
69035796c8dcSSimon Schubert DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
69045796c8dcSSimon Schubert #endif
69055796c8dcSSimon Schubert
69065796c8dcSSimon Schubert PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
69075796c8dcSSimon Schubert break;
69085796c8dcSSimon Schubert
69095796c8dcSSimon Schubert
69105796c8dcSSimon Schubert /* Uses of on_failure_jump:
69115796c8dcSSimon Schubert
69125796c8dcSSimon Schubert Each alternative starts with an on_failure_jump that points
69135796c8dcSSimon Schubert to the beginning of the next alternative. Each alternative
69145796c8dcSSimon Schubert except the last ends with a jump that in effect jumps past
69155796c8dcSSimon Schubert the rest of the alternatives. (They really jump to the
69165796c8dcSSimon Schubert ending jump of the following alternative, because tensioning
69175796c8dcSSimon Schubert these jumps is a hassle.)
69185796c8dcSSimon Schubert
69195796c8dcSSimon Schubert Repeats start with an on_failure_jump that points past both
69205796c8dcSSimon Schubert the repetition text and either the following jump or
69215796c8dcSSimon Schubert pop_failure_jump back to this on_failure_jump. */
69225796c8dcSSimon Schubert case on_failure_jump:
69235796c8dcSSimon Schubert on_failure:
69245796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING on_failure_jump");
69255796c8dcSSimon Schubert
69265796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p);
69275796c8dcSSimon Schubert #ifdef _LIBC
69285796c8dcSSimon Schubert DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
69295796c8dcSSimon Schubert #else
69305796c8dcSSimon Schubert DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
69315796c8dcSSimon Schubert #endif
69325796c8dcSSimon Schubert
69335796c8dcSSimon Schubert /* If this on_failure_jump comes right before a group (i.e.,
69345796c8dcSSimon Schubert the original * applied to a group), save the information
69355796c8dcSSimon Schubert for that group and all inner ones, so that if we fail back
69365796c8dcSSimon Schubert to this point, the group's information will be correct.
69375796c8dcSSimon Schubert For example, in \(a*\)*\1, we need the preceding group,
69385796c8dcSSimon Schubert and in \(zz\(a*\)b*\)\2, we need the inner group. */
69395796c8dcSSimon Schubert
69405796c8dcSSimon Schubert /* We can't use `p' to check ahead because we push
69415796c8dcSSimon Schubert a failure point to `p + mcnt' after we do this. */
69425796c8dcSSimon Schubert p1 = p;
69435796c8dcSSimon Schubert
69445796c8dcSSimon Schubert /* We need to skip no_op's before we look for the
69455796c8dcSSimon Schubert start_memory in case this on_failure_jump is happening as
69465796c8dcSSimon Schubert the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
69475796c8dcSSimon Schubert against aba. */
69485796c8dcSSimon Schubert while (p1 < pend && (re_opcode_t) *p1 == no_op)
69495796c8dcSSimon Schubert p1++;
69505796c8dcSSimon Schubert
69515796c8dcSSimon Schubert if (p1 < pend && (re_opcode_t) *p1 == start_memory)
69525796c8dcSSimon Schubert {
69535796c8dcSSimon Schubert /* We have a new highest active register now. This will
69545796c8dcSSimon Schubert get reset at the start_memory we are about to get to,
69555796c8dcSSimon Schubert but we will have saved all the registers relevant to
69565796c8dcSSimon Schubert this repetition op, as described above. */
69575796c8dcSSimon Schubert highest_active_reg = *(p1 + 1) + *(p1 + 2);
69585796c8dcSSimon Schubert if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
69595796c8dcSSimon Schubert lowest_active_reg = *(p1 + 1);
69605796c8dcSSimon Schubert }
69615796c8dcSSimon Schubert
69625796c8dcSSimon Schubert DEBUG_PRINT1 (":\n");
69635796c8dcSSimon Schubert PUSH_FAILURE_POINT (p + mcnt, d, -2);
69645796c8dcSSimon Schubert break;
69655796c8dcSSimon Schubert
69665796c8dcSSimon Schubert
69675796c8dcSSimon Schubert /* A smart repeat ends with `maybe_pop_jump'.
69685796c8dcSSimon Schubert We change it to either `pop_failure_jump' or `jump'. */
69695796c8dcSSimon Schubert case maybe_pop_jump:
69705796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p);
69715796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
69725796c8dcSSimon Schubert {
69735796c8dcSSimon Schubert register UCHAR_T *p2 = p;
69745796c8dcSSimon Schubert
69755796c8dcSSimon Schubert /* Compare the beginning of the repeat with what in the
69765796c8dcSSimon Schubert pattern follows its end. If we can establish that there
69775796c8dcSSimon Schubert is nothing that they would both match, i.e., that we
69785796c8dcSSimon Schubert would have to backtrack because of (as in, e.g., `a*a')
69795796c8dcSSimon Schubert then we can change to pop_failure_jump, because we'll
69805796c8dcSSimon Schubert never have to backtrack.
69815796c8dcSSimon Schubert
69825796c8dcSSimon Schubert This is not true in the case of alternatives: in
69835796c8dcSSimon Schubert `(a|ab)*' we do need to backtrack to the `ab' alternative
69845796c8dcSSimon Schubert (e.g., if the string was `ab'). But instead of trying to
69855796c8dcSSimon Schubert detect that here, the alternative has put on a dummy
69865796c8dcSSimon Schubert failure point which is what we will end up popping. */
69875796c8dcSSimon Schubert
69885796c8dcSSimon Schubert /* Skip over open/close-group commands.
69895796c8dcSSimon Schubert If what follows this loop is a ...+ construct,
69905796c8dcSSimon Schubert look at what begins its body, since we will have to
69915796c8dcSSimon Schubert match at least one of that. */
69925796c8dcSSimon Schubert while (1)
69935796c8dcSSimon Schubert {
69945796c8dcSSimon Schubert if (p2 + 2 < pend
69955796c8dcSSimon Schubert && ((re_opcode_t) *p2 == stop_memory
69965796c8dcSSimon Schubert || (re_opcode_t) *p2 == start_memory))
69975796c8dcSSimon Schubert p2 += 3;
69985796c8dcSSimon Schubert else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
69995796c8dcSSimon Schubert && (re_opcode_t) *p2 == dummy_failure_jump)
70005796c8dcSSimon Schubert p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
70015796c8dcSSimon Schubert else
70025796c8dcSSimon Schubert break;
70035796c8dcSSimon Schubert }
70045796c8dcSSimon Schubert
70055796c8dcSSimon Schubert p1 = p + mcnt;
70065796c8dcSSimon Schubert /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
70075796c8dcSSimon Schubert to the `maybe_finalize_jump' of this case. Examine what
70085796c8dcSSimon Schubert follows. */
70095796c8dcSSimon Schubert
70105796c8dcSSimon Schubert /* If we're at the end of the pattern, we can change. */
70115796c8dcSSimon Schubert if (p2 == pend)
70125796c8dcSSimon Schubert {
70135796c8dcSSimon Schubert /* Consider what happens when matching ":\(.*\)"
70145796c8dcSSimon Schubert against ":/". I don't really understand this code
70155796c8dcSSimon Schubert yet. */
70165796c8dcSSimon Schubert p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
70175796c8dcSSimon Schubert pop_failure_jump;
70185796c8dcSSimon Schubert DEBUG_PRINT1
70195796c8dcSSimon Schubert (" End of pattern: change to `pop_failure_jump'.\n");
70205796c8dcSSimon Schubert }
70215796c8dcSSimon Schubert
70225796c8dcSSimon Schubert else if ((re_opcode_t) *p2 == exactn
70235796c8dcSSimon Schubert #ifdef MBS_SUPPORT
70245796c8dcSSimon Schubert || (re_opcode_t) *p2 == exactn_bin
70255796c8dcSSimon Schubert #endif
70265796c8dcSSimon Schubert || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
70275796c8dcSSimon Schubert {
70285796c8dcSSimon Schubert register UCHAR_T c
70295796c8dcSSimon Schubert = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
70305796c8dcSSimon Schubert
70315796c8dcSSimon Schubert if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
70325796c8dcSSimon Schubert #ifdef MBS_SUPPORT
70335796c8dcSSimon Schubert || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
70345796c8dcSSimon Schubert #endif
70355796c8dcSSimon Schubert ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
70365796c8dcSSimon Schubert {
70375796c8dcSSimon Schubert p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
70385796c8dcSSimon Schubert pop_failure_jump;
70395796c8dcSSimon Schubert #ifdef WCHAR
70405796c8dcSSimon Schubert DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
70415796c8dcSSimon Schubert (wint_t) c,
70425796c8dcSSimon Schubert (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
70435796c8dcSSimon Schubert #else
70445796c8dcSSimon Schubert DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
70455796c8dcSSimon Schubert (char) c,
70465796c8dcSSimon Schubert (char) p1[3+OFFSET_ADDRESS_SIZE]);
70475796c8dcSSimon Schubert #endif
70485796c8dcSSimon Schubert }
70495796c8dcSSimon Schubert
70505796c8dcSSimon Schubert #ifndef WCHAR
70515796c8dcSSimon Schubert else if ((re_opcode_t) p1[3] == charset
70525796c8dcSSimon Schubert || (re_opcode_t) p1[3] == charset_not)
70535796c8dcSSimon Schubert {
70545796c8dcSSimon Schubert int negate = (re_opcode_t) p1[3] == charset_not;
70555796c8dcSSimon Schubert
70565796c8dcSSimon Schubert if (c < (unsigned) (p1[4] * BYTEWIDTH)
70575796c8dcSSimon Schubert && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
70585796c8dcSSimon Schubert negate = !negate;
70595796c8dcSSimon Schubert
70605796c8dcSSimon Schubert /* `negate' is equal to 1 if c would match, which means
70615796c8dcSSimon Schubert that we can't change to pop_failure_jump. */
70625796c8dcSSimon Schubert if (!negate)
70635796c8dcSSimon Schubert {
70645796c8dcSSimon Schubert p[-3] = (unsigned char) pop_failure_jump;
70655796c8dcSSimon Schubert DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
70665796c8dcSSimon Schubert }
70675796c8dcSSimon Schubert }
70685796c8dcSSimon Schubert #endif /* not WCHAR */
70695796c8dcSSimon Schubert }
70705796c8dcSSimon Schubert #ifndef WCHAR
70715796c8dcSSimon Schubert else if ((re_opcode_t) *p2 == charset)
70725796c8dcSSimon Schubert {
70735796c8dcSSimon Schubert /* We win if the first character of the loop is not part
70745796c8dcSSimon Schubert of the charset. */
70755796c8dcSSimon Schubert if ((re_opcode_t) p1[3] == exactn
70765796c8dcSSimon Schubert && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
70775796c8dcSSimon Schubert && (p2[2 + p1[5] / BYTEWIDTH]
70785796c8dcSSimon Schubert & (1 << (p1[5] % BYTEWIDTH)))))
70795796c8dcSSimon Schubert {
70805796c8dcSSimon Schubert p[-3] = (unsigned char) pop_failure_jump;
70815796c8dcSSimon Schubert DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
70825796c8dcSSimon Schubert }
70835796c8dcSSimon Schubert
70845796c8dcSSimon Schubert else if ((re_opcode_t) p1[3] == charset_not)
70855796c8dcSSimon Schubert {
70865796c8dcSSimon Schubert int idx;
70875796c8dcSSimon Schubert /* We win if the charset_not inside the loop
70885796c8dcSSimon Schubert lists every character listed in the charset after. */
70895796c8dcSSimon Schubert for (idx = 0; idx < (int) p2[1]; idx++)
70905796c8dcSSimon Schubert if (! (p2[2 + idx] == 0
70915796c8dcSSimon Schubert || (idx < (int) p1[4]
70925796c8dcSSimon Schubert && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
70935796c8dcSSimon Schubert break;
70945796c8dcSSimon Schubert
70955796c8dcSSimon Schubert if (idx == p2[1])
70965796c8dcSSimon Schubert {
70975796c8dcSSimon Schubert p[-3] = (unsigned char) pop_failure_jump;
70985796c8dcSSimon Schubert DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
70995796c8dcSSimon Schubert }
71005796c8dcSSimon Schubert }
71015796c8dcSSimon Schubert else if ((re_opcode_t) p1[3] == charset)
71025796c8dcSSimon Schubert {
71035796c8dcSSimon Schubert int idx;
71045796c8dcSSimon Schubert /* We win if the charset inside the loop
71055796c8dcSSimon Schubert has no overlap with the one after the loop. */
71065796c8dcSSimon Schubert for (idx = 0;
71075796c8dcSSimon Schubert idx < (int) p2[1] && idx < (int) p1[4];
71085796c8dcSSimon Schubert idx++)
71095796c8dcSSimon Schubert if ((p2[2 + idx] & p1[5 + idx]) != 0)
71105796c8dcSSimon Schubert break;
71115796c8dcSSimon Schubert
71125796c8dcSSimon Schubert if (idx == p2[1] || idx == p1[4])
71135796c8dcSSimon Schubert {
71145796c8dcSSimon Schubert p[-3] = (unsigned char) pop_failure_jump;
71155796c8dcSSimon Schubert DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
71165796c8dcSSimon Schubert }
71175796c8dcSSimon Schubert }
71185796c8dcSSimon Schubert }
71195796c8dcSSimon Schubert #endif /* not WCHAR */
71205796c8dcSSimon Schubert }
71215796c8dcSSimon Schubert p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */
71225796c8dcSSimon Schubert if ((re_opcode_t) p[-1] != pop_failure_jump)
71235796c8dcSSimon Schubert {
71245796c8dcSSimon Schubert p[-1] = (UCHAR_T) jump;
71255796c8dcSSimon Schubert DEBUG_PRINT1 (" Match => jump.\n");
71265796c8dcSSimon Schubert goto unconditional_jump;
71275796c8dcSSimon Schubert }
71285796c8dcSSimon Schubert /* Note fall through. */
71295796c8dcSSimon Schubert
71305796c8dcSSimon Schubert
71315796c8dcSSimon Schubert /* The end of a simple repeat has a pop_failure_jump back to
71325796c8dcSSimon Schubert its matching on_failure_jump, where the latter will push a
71335796c8dcSSimon Schubert failure point. The pop_failure_jump takes off failure
71345796c8dcSSimon Schubert points put on by this pop_failure_jump's matching
71355796c8dcSSimon Schubert on_failure_jump; we got through the pattern to here from the
71365796c8dcSSimon Schubert matching on_failure_jump, so didn't fail. */
71375796c8dcSSimon Schubert case pop_failure_jump:
71385796c8dcSSimon Schubert {
71395796c8dcSSimon Schubert /* We need to pass separate storage for the lowest and
71405796c8dcSSimon Schubert highest registers, even though we don't care about the
71415796c8dcSSimon Schubert actual values. Otherwise, we will restore only one
71425796c8dcSSimon Schubert register from the stack, since lowest will == highest in
71435796c8dcSSimon Schubert `pop_failure_point'. */
71445796c8dcSSimon Schubert active_reg_t dummy_low_reg, dummy_high_reg;
7145cf7f2e2dSJohn Marino UCHAR_T *pdummy ATTRIBUTE_UNUSED = NULL;
7146cf7f2e2dSJohn Marino const CHAR_T *sdummy ATTRIBUTE_UNUSED = NULL;
71475796c8dcSSimon Schubert
71485796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
71495796c8dcSSimon Schubert POP_FAILURE_POINT (sdummy, pdummy,
71505796c8dcSSimon Schubert dummy_low_reg, dummy_high_reg,
71515796c8dcSSimon Schubert reg_dummy, reg_dummy, reg_info_dummy);
71525796c8dcSSimon Schubert }
71535796c8dcSSimon Schubert /* Note fall through. */
71545796c8dcSSimon Schubert
71555796c8dcSSimon Schubert unconditional_jump:
71565796c8dcSSimon Schubert #ifdef _LIBC
71575796c8dcSSimon Schubert DEBUG_PRINT2 ("\n%p: ", p);
71585796c8dcSSimon Schubert #else
71595796c8dcSSimon Schubert DEBUG_PRINT2 ("\n0x%x: ", p);
71605796c8dcSSimon Schubert #endif
71615796c8dcSSimon Schubert /* Note fall through. */
71625796c8dcSSimon Schubert
71635796c8dcSSimon Schubert /* Unconditionally jump (without popping any failure points). */
71645796c8dcSSimon Schubert case jump:
71655796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
71665796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
71675796c8dcSSimon Schubert p += mcnt; /* Do the jump. */
71685796c8dcSSimon Schubert #ifdef _LIBC
71695796c8dcSSimon Schubert DEBUG_PRINT2 ("(to %p).\n", p);
71705796c8dcSSimon Schubert #else
71715796c8dcSSimon Schubert DEBUG_PRINT2 ("(to 0x%x).\n", p);
71725796c8dcSSimon Schubert #endif
71735796c8dcSSimon Schubert break;
71745796c8dcSSimon Schubert
71755796c8dcSSimon Schubert
71765796c8dcSSimon Schubert /* We need this opcode so we can detect where alternatives end
71775796c8dcSSimon Schubert in `group_match_null_string_p' et al. */
71785796c8dcSSimon Schubert case jump_past_alt:
71795796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
71805796c8dcSSimon Schubert goto unconditional_jump;
71815796c8dcSSimon Schubert
71825796c8dcSSimon Schubert
71835796c8dcSSimon Schubert /* Normally, the on_failure_jump pushes a failure point, which
71845796c8dcSSimon Schubert then gets popped at pop_failure_jump. We will end up at
71855796c8dcSSimon Schubert pop_failure_jump, also, and with a pattern of, say, `a+', we
71865796c8dcSSimon Schubert are skipping over the on_failure_jump, so we have to push
71875796c8dcSSimon Schubert something meaningless for pop_failure_jump to pop. */
71885796c8dcSSimon Schubert case dummy_failure_jump:
71895796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
71905796c8dcSSimon Schubert /* It doesn't matter what we push for the string here. What
71915796c8dcSSimon Schubert the code at `fail' tests is the value for the pattern. */
71925796c8dcSSimon Schubert PUSH_FAILURE_POINT (NULL, NULL, -2);
71935796c8dcSSimon Schubert goto unconditional_jump;
71945796c8dcSSimon Schubert
71955796c8dcSSimon Schubert
71965796c8dcSSimon Schubert /* At the end of an alternative, we need to push a dummy failure
71975796c8dcSSimon Schubert point in case we are followed by a `pop_failure_jump', because
71985796c8dcSSimon Schubert we don't want the failure point for the alternative to be
71995796c8dcSSimon Schubert popped. For example, matching `(a|ab)*' against `aab'
72005796c8dcSSimon Schubert requires that we match the `ab' alternative. */
72015796c8dcSSimon Schubert case push_dummy_failure:
72025796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
72035796c8dcSSimon Schubert /* See comments just above at `dummy_failure_jump' about the
72045796c8dcSSimon Schubert two zeroes. */
72055796c8dcSSimon Schubert PUSH_FAILURE_POINT (NULL, NULL, -2);
72065796c8dcSSimon Schubert break;
72075796c8dcSSimon Schubert
72085796c8dcSSimon Schubert /* Have to succeed matching what follows at least n times.
72095796c8dcSSimon Schubert After that, handle like `on_failure_jump'. */
72105796c8dcSSimon Schubert case succeed_n:
72115796c8dcSSimon Schubert EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
72125796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
72135796c8dcSSimon Schubert
72145796c8dcSSimon Schubert assert (mcnt >= 0);
72155796c8dcSSimon Schubert /* Originally, this is how many times we HAVE to succeed. */
72165796c8dcSSimon Schubert if (mcnt > 0)
72175796c8dcSSimon Schubert {
72185796c8dcSSimon Schubert mcnt--;
72195796c8dcSSimon Schubert p += OFFSET_ADDRESS_SIZE;
72205796c8dcSSimon Schubert STORE_NUMBER_AND_INCR (p, mcnt);
72215796c8dcSSimon Schubert #ifdef _LIBC
72225796c8dcSSimon Schubert DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
72235796c8dcSSimon Schubert , mcnt);
72245796c8dcSSimon Schubert #else
72255796c8dcSSimon Schubert DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
72265796c8dcSSimon Schubert , mcnt);
72275796c8dcSSimon Schubert #endif
72285796c8dcSSimon Schubert }
72295796c8dcSSimon Schubert else if (mcnt == 0)
72305796c8dcSSimon Schubert {
72315796c8dcSSimon Schubert #ifdef _LIBC
72325796c8dcSSimon Schubert DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
72335796c8dcSSimon Schubert p + OFFSET_ADDRESS_SIZE);
72345796c8dcSSimon Schubert #else
72355796c8dcSSimon Schubert DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
72365796c8dcSSimon Schubert p + OFFSET_ADDRESS_SIZE);
72375796c8dcSSimon Schubert #endif /* _LIBC */
72385796c8dcSSimon Schubert
72395796c8dcSSimon Schubert #ifdef WCHAR
72405796c8dcSSimon Schubert p[1] = (UCHAR_T) no_op;
72415796c8dcSSimon Schubert #else
72425796c8dcSSimon Schubert p[2] = (UCHAR_T) no_op;
72435796c8dcSSimon Schubert p[3] = (UCHAR_T) no_op;
72445796c8dcSSimon Schubert #endif /* WCHAR */
72455796c8dcSSimon Schubert goto on_failure;
72465796c8dcSSimon Schubert }
72475796c8dcSSimon Schubert break;
72485796c8dcSSimon Schubert
72495796c8dcSSimon Schubert case jump_n:
72505796c8dcSSimon Schubert EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
72515796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
72525796c8dcSSimon Schubert
72535796c8dcSSimon Schubert /* Originally, this is how many times we CAN jump. */
72545796c8dcSSimon Schubert if (mcnt)
72555796c8dcSSimon Schubert {
72565796c8dcSSimon Schubert mcnt--;
72575796c8dcSSimon Schubert STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
72585796c8dcSSimon Schubert
72595796c8dcSSimon Schubert #ifdef _LIBC
72605796c8dcSSimon Schubert DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
72615796c8dcSSimon Schubert mcnt);
72625796c8dcSSimon Schubert #else
72635796c8dcSSimon Schubert DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
72645796c8dcSSimon Schubert mcnt);
72655796c8dcSSimon Schubert #endif /* _LIBC */
72665796c8dcSSimon Schubert goto unconditional_jump;
72675796c8dcSSimon Schubert }
72685796c8dcSSimon Schubert /* If don't have to jump any more, skip over the rest of command. */
72695796c8dcSSimon Schubert else
72705796c8dcSSimon Schubert p += 2 * OFFSET_ADDRESS_SIZE;
72715796c8dcSSimon Schubert break;
72725796c8dcSSimon Schubert
72735796c8dcSSimon Schubert case set_number_at:
72745796c8dcSSimon Schubert {
72755796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
72765796c8dcSSimon Schubert
72775796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p);
72785796c8dcSSimon Schubert p1 = p + mcnt;
72795796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p);
72805796c8dcSSimon Schubert #ifdef _LIBC
72815796c8dcSSimon Schubert DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
72825796c8dcSSimon Schubert #else
72835796c8dcSSimon Schubert DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
72845796c8dcSSimon Schubert #endif
72855796c8dcSSimon Schubert STORE_NUMBER (p1, mcnt);
72865796c8dcSSimon Schubert break;
72875796c8dcSSimon Schubert }
72885796c8dcSSimon Schubert
72895796c8dcSSimon Schubert #if 0
72905796c8dcSSimon Schubert /* The DEC Alpha C compiler 3.x generates incorrect code for the
72915796c8dcSSimon Schubert test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
72925796c8dcSSimon Schubert AT_WORD_BOUNDARY, so this code is disabled. Expanding the
72935796c8dcSSimon Schubert macro and introducing temporary variables works around the bug. */
72945796c8dcSSimon Schubert
72955796c8dcSSimon Schubert case wordbound:
72965796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING wordbound.\n");
72975796c8dcSSimon Schubert if (AT_WORD_BOUNDARY (d))
72985796c8dcSSimon Schubert break;
72995796c8dcSSimon Schubert goto fail;
73005796c8dcSSimon Schubert
73015796c8dcSSimon Schubert case notwordbound:
73025796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
73035796c8dcSSimon Schubert if (AT_WORD_BOUNDARY (d))
73045796c8dcSSimon Schubert goto fail;
73055796c8dcSSimon Schubert break;
73065796c8dcSSimon Schubert #else
73075796c8dcSSimon Schubert case wordbound:
73085796c8dcSSimon Schubert {
73095796c8dcSSimon Schubert boolean prevchar, thischar;
73105796c8dcSSimon Schubert
73115796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING wordbound.\n");
73125796c8dcSSimon Schubert if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
73135796c8dcSSimon Schubert break;
73145796c8dcSSimon Schubert
73155796c8dcSSimon Schubert prevchar = WORDCHAR_P (d - 1);
73165796c8dcSSimon Schubert thischar = WORDCHAR_P (d);
73175796c8dcSSimon Schubert if (prevchar != thischar)
73185796c8dcSSimon Schubert break;
73195796c8dcSSimon Schubert goto fail;
73205796c8dcSSimon Schubert }
73215796c8dcSSimon Schubert
73225796c8dcSSimon Schubert case notwordbound:
73235796c8dcSSimon Schubert {
73245796c8dcSSimon Schubert boolean prevchar, thischar;
73255796c8dcSSimon Schubert
73265796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
73275796c8dcSSimon Schubert if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
73285796c8dcSSimon Schubert goto fail;
73295796c8dcSSimon Schubert
73305796c8dcSSimon Schubert prevchar = WORDCHAR_P (d - 1);
73315796c8dcSSimon Schubert thischar = WORDCHAR_P (d);
73325796c8dcSSimon Schubert if (prevchar != thischar)
73335796c8dcSSimon Schubert goto fail;
73345796c8dcSSimon Schubert break;
73355796c8dcSSimon Schubert }
73365796c8dcSSimon Schubert #endif
73375796c8dcSSimon Schubert
73385796c8dcSSimon Schubert case wordbeg:
73395796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
73405796c8dcSSimon Schubert if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
73415796c8dcSSimon Schubert && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
73425796c8dcSSimon Schubert break;
73435796c8dcSSimon Schubert goto fail;
73445796c8dcSSimon Schubert
73455796c8dcSSimon Schubert case wordend:
73465796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING wordend.\n");
73475796c8dcSSimon Schubert if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
73485796c8dcSSimon Schubert && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
73495796c8dcSSimon Schubert break;
73505796c8dcSSimon Schubert goto fail;
73515796c8dcSSimon Schubert
73525796c8dcSSimon Schubert #ifdef emacs
73535796c8dcSSimon Schubert case before_dot:
73545796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING before_dot.\n");
73555796c8dcSSimon Schubert if (PTR_CHAR_POS ((unsigned char *) d) >= point)
73565796c8dcSSimon Schubert goto fail;
73575796c8dcSSimon Schubert break;
73585796c8dcSSimon Schubert
73595796c8dcSSimon Schubert case at_dot:
73605796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING at_dot.\n");
73615796c8dcSSimon Schubert if (PTR_CHAR_POS ((unsigned char *) d) != point)
73625796c8dcSSimon Schubert goto fail;
73635796c8dcSSimon Schubert break;
73645796c8dcSSimon Schubert
73655796c8dcSSimon Schubert case after_dot:
73665796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING after_dot.\n");
73675796c8dcSSimon Schubert if (PTR_CHAR_POS ((unsigned char *) d) <= point)
73685796c8dcSSimon Schubert goto fail;
73695796c8dcSSimon Schubert break;
73705796c8dcSSimon Schubert
73715796c8dcSSimon Schubert case syntaxspec:
73725796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
73735796c8dcSSimon Schubert mcnt = *p++;
73745796c8dcSSimon Schubert goto matchsyntax;
73755796c8dcSSimon Schubert
73765796c8dcSSimon Schubert case wordchar:
73775796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
73785796c8dcSSimon Schubert mcnt = (int) Sword;
73795796c8dcSSimon Schubert matchsyntax:
73805796c8dcSSimon Schubert PREFETCH ();
73815796c8dcSSimon Schubert /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
73825796c8dcSSimon Schubert d++;
73835796c8dcSSimon Schubert if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
73845796c8dcSSimon Schubert goto fail;
73855796c8dcSSimon Schubert SET_REGS_MATCHED ();
73865796c8dcSSimon Schubert break;
73875796c8dcSSimon Schubert
73885796c8dcSSimon Schubert case notsyntaxspec:
73895796c8dcSSimon Schubert DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
73905796c8dcSSimon Schubert mcnt = *p++;
73915796c8dcSSimon Schubert goto matchnotsyntax;
73925796c8dcSSimon Schubert
73935796c8dcSSimon Schubert case notwordchar:
73945796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
73955796c8dcSSimon Schubert mcnt = (int) Sword;
73965796c8dcSSimon Schubert matchnotsyntax:
73975796c8dcSSimon Schubert PREFETCH ();
73985796c8dcSSimon Schubert /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
73995796c8dcSSimon Schubert d++;
74005796c8dcSSimon Schubert if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
74015796c8dcSSimon Schubert goto fail;
74025796c8dcSSimon Schubert SET_REGS_MATCHED ();
74035796c8dcSSimon Schubert break;
74045796c8dcSSimon Schubert
74055796c8dcSSimon Schubert #else /* not emacs */
74065796c8dcSSimon Schubert case wordchar:
74075796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
74085796c8dcSSimon Schubert PREFETCH ();
74095796c8dcSSimon Schubert if (!WORDCHAR_P (d))
74105796c8dcSSimon Schubert goto fail;
74115796c8dcSSimon Schubert SET_REGS_MATCHED ();
74125796c8dcSSimon Schubert d++;
74135796c8dcSSimon Schubert break;
74145796c8dcSSimon Schubert
74155796c8dcSSimon Schubert case notwordchar:
74165796c8dcSSimon Schubert DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
74175796c8dcSSimon Schubert PREFETCH ();
74185796c8dcSSimon Schubert if (WORDCHAR_P (d))
74195796c8dcSSimon Schubert goto fail;
74205796c8dcSSimon Schubert SET_REGS_MATCHED ();
74215796c8dcSSimon Schubert d++;
74225796c8dcSSimon Schubert break;
74235796c8dcSSimon Schubert #endif /* not emacs */
74245796c8dcSSimon Schubert
74255796c8dcSSimon Schubert default:
74265796c8dcSSimon Schubert abort ();
74275796c8dcSSimon Schubert }
74285796c8dcSSimon Schubert continue; /* Successfully executed one pattern command; keep going. */
74295796c8dcSSimon Schubert
74305796c8dcSSimon Schubert
74315796c8dcSSimon Schubert /* We goto here if a matching operation fails. */
74325796c8dcSSimon Schubert fail:
74335796c8dcSSimon Schubert if (!FAIL_STACK_EMPTY ())
74345796c8dcSSimon Schubert { /* A restart point is known. Restore to that state. */
74355796c8dcSSimon Schubert DEBUG_PRINT1 ("\nFAIL:\n");
74365796c8dcSSimon Schubert POP_FAILURE_POINT (d, p,
74375796c8dcSSimon Schubert lowest_active_reg, highest_active_reg,
74385796c8dcSSimon Schubert regstart, regend, reg_info);
74395796c8dcSSimon Schubert
74405796c8dcSSimon Schubert /* If this failure point is a dummy, try the next one. */
74415796c8dcSSimon Schubert if (!p)
74425796c8dcSSimon Schubert goto fail;
74435796c8dcSSimon Schubert
74445796c8dcSSimon Schubert /* If we failed to the end of the pattern, don't examine *p. */
74455796c8dcSSimon Schubert assert (p <= pend);
74465796c8dcSSimon Schubert if (p < pend)
74475796c8dcSSimon Schubert {
74485796c8dcSSimon Schubert boolean is_a_jump_n = false;
74495796c8dcSSimon Schubert
74505796c8dcSSimon Schubert /* If failed to a backwards jump that's part of a repetition
74515796c8dcSSimon Schubert loop, need to pop this failure point and use the next one. */
74525796c8dcSSimon Schubert switch ((re_opcode_t) *p)
74535796c8dcSSimon Schubert {
74545796c8dcSSimon Schubert case jump_n:
74555796c8dcSSimon Schubert is_a_jump_n = true;
74565796c8dcSSimon Schubert case maybe_pop_jump:
74575796c8dcSSimon Schubert case pop_failure_jump:
74585796c8dcSSimon Schubert case jump:
74595796c8dcSSimon Schubert p1 = p + 1;
74605796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
74615796c8dcSSimon Schubert p1 += mcnt;
74625796c8dcSSimon Schubert
74635796c8dcSSimon Schubert if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
74645796c8dcSSimon Schubert || (!is_a_jump_n
74655796c8dcSSimon Schubert && (re_opcode_t) *p1 == on_failure_jump))
74665796c8dcSSimon Schubert goto fail;
74675796c8dcSSimon Schubert break;
74685796c8dcSSimon Schubert default:
74695796c8dcSSimon Schubert /* do nothing */ ;
74705796c8dcSSimon Schubert }
74715796c8dcSSimon Schubert }
74725796c8dcSSimon Schubert
74735796c8dcSSimon Schubert if (d >= string1 && d <= end1)
74745796c8dcSSimon Schubert dend = end_match_1;
74755796c8dcSSimon Schubert }
74765796c8dcSSimon Schubert else
74775796c8dcSSimon Schubert break; /* Matching at this starting point really fails. */
74785796c8dcSSimon Schubert } /* for (;;) */
74795796c8dcSSimon Schubert
74805796c8dcSSimon Schubert if (best_regs_set)
74815796c8dcSSimon Schubert goto restore_best_regs;
74825796c8dcSSimon Schubert
74835796c8dcSSimon Schubert FREE_VARIABLES ();
74845796c8dcSSimon Schubert
74855796c8dcSSimon Schubert return -1; /* Failure to match. */
74865796c8dcSSimon Schubert } /* re_match_2 */
74875796c8dcSSimon Schubert
74885796c8dcSSimon Schubert /* Subroutine definitions for re_match_2. */
74895796c8dcSSimon Schubert
74905796c8dcSSimon Schubert
74915796c8dcSSimon Schubert /* We are passed P pointing to a register number after a start_memory.
74925796c8dcSSimon Schubert
74935796c8dcSSimon Schubert Return true if the pattern up to the corresponding stop_memory can
74945796c8dcSSimon Schubert match the empty string, and false otherwise.
74955796c8dcSSimon Schubert
74965796c8dcSSimon Schubert If we find the matching stop_memory, sets P to point to one past its number.
74975796c8dcSSimon Schubert Otherwise, sets P to an undefined byte less than or equal to END.
74985796c8dcSSimon Schubert
74995796c8dcSSimon Schubert We don't handle duplicates properly (yet). */
75005796c8dcSSimon Schubert
75015796c8dcSSimon Schubert static boolean
PREFIX(group_match_null_string_p)75025796c8dcSSimon Schubert PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
75035796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info)
75045796c8dcSSimon Schubert {
75055796c8dcSSimon Schubert int mcnt;
75065796c8dcSSimon Schubert /* Point to after the args to the start_memory. */
75075796c8dcSSimon Schubert UCHAR_T *p1 = *p + 2;
75085796c8dcSSimon Schubert
75095796c8dcSSimon Schubert while (p1 < end)
75105796c8dcSSimon Schubert {
75115796c8dcSSimon Schubert /* Skip over opcodes that can match nothing, and return true or
75125796c8dcSSimon Schubert false, as appropriate, when we get to one that can't, or to the
75135796c8dcSSimon Schubert matching stop_memory. */
75145796c8dcSSimon Schubert
75155796c8dcSSimon Schubert switch ((re_opcode_t) *p1)
75165796c8dcSSimon Schubert {
75175796c8dcSSimon Schubert /* Could be either a loop or a series of alternatives. */
75185796c8dcSSimon Schubert case on_failure_jump:
75195796c8dcSSimon Schubert p1++;
75205796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
75215796c8dcSSimon Schubert
75225796c8dcSSimon Schubert /* If the next operation is not a jump backwards in the
75235796c8dcSSimon Schubert pattern. */
75245796c8dcSSimon Schubert
75255796c8dcSSimon Schubert if (mcnt >= 0)
75265796c8dcSSimon Schubert {
75275796c8dcSSimon Schubert /* Go through the on_failure_jumps of the alternatives,
75285796c8dcSSimon Schubert seeing if any of the alternatives cannot match nothing.
75295796c8dcSSimon Schubert The last alternative starts with only a jump,
75305796c8dcSSimon Schubert whereas the rest start with on_failure_jump and end
75315796c8dcSSimon Schubert with a jump, e.g., here is the pattern for `a|b|c':
75325796c8dcSSimon Schubert
75335796c8dcSSimon Schubert /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
75345796c8dcSSimon Schubert /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
75355796c8dcSSimon Schubert /exactn/1/c
75365796c8dcSSimon Schubert
75375796c8dcSSimon Schubert So, we have to first go through the first (n-1)
75385796c8dcSSimon Schubert alternatives and then deal with the last one separately. */
75395796c8dcSSimon Schubert
75405796c8dcSSimon Schubert
75415796c8dcSSimon Schubert /* Deal with the first (n-1) alternatives, which start
75425796c8dcSSimon Schubert with an on_failure_jump (see above) that jumps to right
75435796c8dcSSimon Schubert past a jump_past_alt. */
75445796c8dcSSimon Schubert
75455796c8dcSSimon Schubert while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
75465796c8dcSSimon Schubert jump_past_alt)
75475796c8dcSSimon Schubert {
75485796c8dcSSimon Schubert /* `mcnt' holds how many bytes long the alternative
75495796c8dcSSimon Schubert is, including the ending `jump_past_alt' and
75505796c8dcSSimon Schubert its number. */
75515796c8dcSSimon Schubert
75525796c8dcSSimon Schubert if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
75535796c8dcSSimon Schubert (1 + OFFSET_ADDRESS_SIZE),
75545796c8dcSSimon Schubert reg_info))
75555796c8dcSSimon Schubert return false;
75565796c8dcSSimon Schubert
75575796c8dcSSimon Schubert /* Move to right after this alternative, including the
75585796c8dcSSimon Schubert jump_past_alt. */
75595796c8dcSSimon Schubert p1 += mcnt;
75605796c8dcSSimon Schubert
75615796c8dcSSimon Schubert /* Break if it's the beginning of an n-th alternative
75625796c8dcSSimon Schubert that doesn't begin with an on_failure_jump. */
75635796c8dcSSimon Schubert if ((re_opcode_t) *p1 != on_failure_jump)
75645796c8dcSSimon Schubert break;
75655796c8dcSSimon Schubert
75665796c8dcSSimon Schubert /* Still have to check that it's not an n-th
75675796c8dcSSimon Schubert alternative that starts with an on_failure_jump. */
75685796c8dcSSimon Schubert p1++;
75695796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
75705796c8dcSSimon Schubert if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
75715796c8dcSSimon Schubert jump_past_alt)
75725796c8dcSSimon Schubert {
75735796c8dcSSimon Schubert /* Get to the beginning of the n-th alternative. */
75745796c8dcSSimon Schubert p1 -= 1 + OFFSET_ADDRESS_SIZE;
75755796c8dcSSimon Schubert break;
75765796c8dcSSimon Schubert }
75775796c8dcSSimon Schubert }
75785796c8dcSSimon Schubert
75795796c8dcSSimon Schubert /* Deal with the last alternative: go back and get number
75805796c8dcSSimon Schubert of the `jump_past_alt' just before it. `mcnt' contains
75815796c8dcSSimon Schubert the length of the alternative. */
75825796c8dcSSimon Schubert EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
75835796c8dcSSimon Schubert
75845796c8dcSSimon Schubert if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
75855796c8dcSSimon Schubert return false;
75865796c8dcSSimon Schubert
75875796c8dcSSimon Schubert p1 += mcnt; /* Get past the n-th alternative. */
75885796c8dcSSimon Schubert } /* if mcnt > 0 */
75895796c8dcSSimon Schubert break;
75905796c8dcSSimon Schubert
75915796c8dcSSimon Schubert
75925796c8dcSSimon Schubert case stop_memory:
75935796c8dcSSimon Schubert assert (p1[1] == **p);
75945796c8dcSSimon Schubert *p = p1 + 2;
75955796c8dcSSimon Schubert return true;
75965796c8dcSSimon Schubert
75975796c8dcSSimon Schubert
75985796c8dcSSimon Schubert default:
75995796c8dcSSimon Schubert if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
76005796c8dcSSimon Schubert return false;
76015796c8dcSSimon Schubert }
76025796c8dcSSimon Schubert } /* while p1 < end */
76035796c8dcSSimon Schubert
76045796c8dcSSimon Schubert return false;
76055796c8dcSSimon Schubert } /* group_match_null_string_p */
76065796c8dcSSimon Schubert
76075796c8dcSSimon Schubert
76085796c8dcSSimon Schubert /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
76095796c8dcSSimon Schubert It expects P to be the first byte of a single alternative and END one
76105796c8dcSSimon Schubert byte past the last. The alternative can contain groups. */
76115796c8dcSSimon Schubert
76125796c8dcSSimon Schubert static boolean
PREFIX(alt_match_null_string_p)76135796c8dcSSimon Schubert PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
76145796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info)
76155796c8dcSSimon Schubert {
76165796c8dcSSimon Schubert int mcnt;
76175796c8dcSSimon Schubert UCHAR_T *p1 = p;
76185796c8dcSSimon Schubert
76195796c8dcSSimon Schubert while (p1 < end)
76205796c8dcSSimon Schubert {
76215796c8dcSSimon Schubert /* Skip over opcodes that can match nothing, and break when we get
76225796c8dcSSimon Schubert to one that can't. */
76235796c8dcSSimon Schubert
76245796c8dcSSimon Schubert switch ((re_opcode_t) *p1)
76255796c8dcSSimon Schubert {
76265796c8dcSSimon Schubert /* It's a loop. */
76275796c8dcSSimon Schubert case on_failure_jump:
76285796c8dcSSimon Schubert p1++;
76295796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
76305796c8dcSSimon Schubert p1 += mcnt;
76315796c8dcSSimon Schubert break;
76325796c8dcSSimon Schubert
76335796c8dcSSimon Schubert default:
76345796c8dcSSimon Schubert if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
76355796c8dcSSimon Schubert return false;
76365796c8dcSSimon Schubert }
76375796c8dcSSimon Schubert } /* while p1 < end */
76385796c8dcSSimon Schubert
76395796c8dcSSimon Schubert return true;
76405796c8dcSSimon Schubert } /* alt_match_null_string_p */
76415796c8dcSSimon Schubert
76425796c8dcSSimon Schubert
76435796c8dcSSimon Schubert /* Deals with the ops common to group_match_null_string_p and
76445796c8dcSSimon Schubert alt_match_null_string_p.
76455796c8dcSSimon Schubert
76465796c8dcSSimon Schubert Sets P to one after the op and its arguments, if any. */
76475796c8dcSSimon Schubert
76485796c8dcSSimon Schubert static boolean
PREFIX(common_op_match_null_string_p)76495796c8dcSSimon Schubert PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
76505796c8dcSSimon Schubert PREFIX(register_info_type) *reg_info)
76515796c8dcSSimon Schubert {
76525796c8dcSSimon Schubert int mcnt;
76535796c8dcSSimon Schubert boolean ret;
76545796c8dcSSimon Schubert int reg_no;
76555796c8dcSSimon Schubert UCHAR_T *p1 = *p;
76565796c8dcSSimon Schubert
76575796c8dcSSimon Schubert switch ((re_opcode_t) *p1++)
76585796c8dcSSimon Schubert {
76595796c8dcSSimon Schubert case no_op:
76605796c8dcSSimon Schubert case begline:
76615796c8dcSSimon Schubert case endline:
76625796c8dcSSimon Schubert case begbuf:
76635796c8dcSSimon Schubert case endbuf:
76645796c8dcSSimon Schubert case wordbeg:
76655796c8dcSSimon Schubert case wordend:
76665796c8dcSSimon Schubert case wordbound:
76675796c8dcSSimon Schubert case notwordbound:
76685796c8dcSSimon Schubert #ifdef emacs
76695796c8dcSSimon Schubert case before_dot:
76705796c8dcSSimon Schubert case at_dot:
76715796c8dcSSimon Schubert case after_dot:
76725796c8dcSSimon Schubert #endif
76735796c8dcSSimon Schubert break;
76745796c8dcSSimon Schubert
76755796c8dcSSimon Schubert case start_memory:
76765796c8dcSSimon Schubert reg_no = *p1;
76775796c8dcSSimon Schubert assert (reg_no > 0 && reg_no <= MAX_REGNUM);
76785796c8dcSSimon Schubert ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
76795796c8dcSSimon Schubert
76805796c8dcSSimon Schubert /* Have to set this here in case we're checking a group which
76815796c8dcSSimon Schubert contains a group and a back reference to it. */
76825796c8dcSSimon Schubert
76835796c8dcSSimon Schubert if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
76845796c8dcSSimon Schubert REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
76855796c8dcSSimon Schubert
76865796c8dcSSimon Schubert if (!ret)
76875796c8dcSSimon Schubert return false;
76885796c8dcSSimon Schubert break;
76895796c8dcSSimon Schubert
76905796c8dcSSimon Schubert /* If this is an optimized succeed_n for zero times, make the jump. */
76915796c8dcSSimon Schubert case jump:
76925796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
76935796c8dcSSimon Schubert if (mcnt >= 0)
76945796c8dcSSimon Schubert p1 += mcnt;
76955796c8dcSSimon Schubert else
76965796c8dcSSimon Schubert return false;
76975796c8dcSSimon Schubert break;
76985796c8dcSSimon Schubert
76995796c8dcSSimon Schubert case succeed_n:
77005796c8dcSSimon Schubert /* Get to the number of times to succeed. */
77015796c8dcSSimon Schubert p1 += OFFSET_ADDRESS_SIZE;
77025796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
77035796c8dcSSimon Schubert
77045796c8dcSSimon Schubert if (mcnt == 0)
77055796c8dcSSimon Schubert {
77065796c8dcSSimon Schubert p1 -= 2 * OFFSET_ADDRESS_SIZE;
77075796c8dcSSimon Schubert EXTRACT_NUMBER_AND_INCR (mcnt, p1);
77085796c8dcSSimon Schubert p1 += mcnt;
77095796c8dcSSimon Schubert }
77105796c8dcSSimon Schubert else
77115796c8dcSSimon Schubert return false;
77125796c8dcSSimon Schubert break;
77135796c8dcSSimon Schubert
77145796c8dcSSimon Schubert case duplicate:
77155796c8dcSSimon Schubert if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
77165796c8dcSSimon Schubert return false;
77175796c8dcSSimon Schubert break;
77185796c8dcSSimon Schubert
77195796c8dcSSimon Schubert case set_number_at:
77205796c8dcSSimon Schubert p1 += 2 * OFFSET_ADDRESS_SIZE;
77215796c8dcSSimon Schubert
77225796c8dcSSimon Schubert default:
77235796c8dcSSimon Schubert /* All other opcodes mean we cannot match the empty string. */
77245796c8dcSSimon Schubert return false;
77255796c8dcSSimon Schubert }
77265796c8dcSSimon Schubert
77275796c8dcSSimon Schubert *p = p1;
77285796c8dcSSimon Schubert return true;
77295796c8dcSSimon Schubert } /* common_op_match_null_string_p */
77305796c8dcSSimon Schubert
77315796c8dcSSimon Schubert
77325796c8dcSSimon Schubert /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
77335796c8dcSSimon Schubert bytes; nonzero otherwise. */
77345796c8dcSSimon Schubert
77355796c8dcSSimon Schubert static int
PREFIX(bcmp_translate)77365796c8dcSSimon Schubert PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
77375796c8dcSSimon Schubert RE_TRANSLATE_TYPE translate)
77385796c8dcSSimon Schubert {
77395796c8dcSSimon Schubert register const UCHAR_T *p1 = (const UCHAR_T *) s1;
77405796c8dcSSimon Schubert register const UCHAR_T *p2 = (const UCHAR_T *) s2;
77415796c8dcSSimon Schubert while (len)
77425796c8dcSSimon Schubert {
77435796c8dcSSimon Schubert #ifdef WCHAR
77445796c8dcSSimon Schubert if (((*p1<=0xff)?translate[*p1++]:*p1++)
77455796c8dcSSimon Schubert != ((*p2<=0xff)?translate[*p2++]:*p2++))
77465796c8dcSSimon Schubert return 1;
77475796c8dcSSimon Schubert #else /* BYTE */
77485796c8dcSSimon Schubert if (translate[*p1++] != translate[*p2++]) return 1;
77495796c8dcSSimon Schubert #endif /* WCHAR */
77505796c8dcSSimon Schubert len--;
77515796c8dcSSimon Schubert }
77525796c8dcSSimon Schubert return 0;
77535796c8dcSSimon Schubert }
77545796c8dcSSimon Schubert
77555796c8dcSSimon Schubert
77565796c8dcSSimon Schubert #else /* not INSIDE_RECURSION */
77575796c8dcSSimon Schubert
77585796c8dcSSimon Schubert /* Entry points for GNU code. */
77595796c8dcSSimon Schubert
77605796c8dcSSimon Schubert /* re_compile_pattern is the GNU regular expression compiler: it
77615796c8dcSSimon Schubert compiles PATTERN (of length SIZE) and puts the result in BUFP.
77625796c8dcSSimon Schubert Returns 0 if the pattern was valid, otherwise an error string.
77635796c8dcSSimon Schubert
77645796c8dcSSimon Schubert Assumes the `allocated' (and perhaps `buffer') and `translate' fields
77655796c8dcSSimon Schubert are set in BUFP on entry.
77665796c8dcSSimon Schubert
77675796c8dcSSimon Schubert We call regex_compile to do the actual compilation. */
77685796c8dcSSimon Schubert
77695796c8dcSSimon Schubert const char *
77705796c8dcSSimon Schubert re_compile_pattern (const char *pattern, size_t length,
77715796c8dcSSimon Schubert struct re_pattern_buffer *bufp)
77725796c8dcSSimon Schubert {
77735796c8dcSSimon Schubert reg_errcode_t ret;
77745796c8dcSSimon Schubert
77755796c8dcSSimon Schubert /* GNU code is written to assume at least RE_NREGS registers will be set
77765796c8dcSSimon Schubert (and at least one extra will be -1). */
77775796c8dcSSimon Schubert bufp->regs_allocated = REGS_UNALLOCATED;
77785796c8dcSSimon Schubert
77795796c8dcSSimon Schubert /* And GNU code determines whether or not to get register information
77805796c8dcSSimon Schubert by passing null for the REGS argument to re_match, etc., not by
77815796c8dcSSimon Schubert setting no_sub. */
77825796c8dcSSimon Schubert bufp->no_sub = 0;
77835796c8dcSSimon Schubert
77845796c8dcSSimon Schubert /* Match anchors at newline. */
77855796c8dcSSimon Schubert bufp->newline_anchor = 1;
77865796c8dcSSimon Schubert
77875796c8dcSSimon Schubert # ifdef MBS_SUPPORT
77885796c8dcSSimon Schubert if (MB_CUR_MAX != 1)
77895796c8dcSSimon Schubert ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
77905796c8dcSSimon Schubert else
77915796c8dcSSimon Schubert # endif
77925796c8dcSSimon Schubert ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
77935796c8dcSSimon Schubert
77945796c8dcSSimon Schubert if (!ret)
77955796c8dcSSimon Schubert return NULL;
77965796c8dcSSimon Schubert return gettext (re_error_msgid[(int) ret]);
77975796c8dcSSimon Schubert }
77985796c8dcSSimon Schubert #ifdef _LIBC
77995796c8dcSSimon Schubert weak_alias (__re_compile_pattern, re_compile_pattern)
78005796c8dcSSimon Schubert #endif
78015796c8dcSSimon Schubert
78025796c8dcSSimon Schubert /* Entry points compatible with 4.2 BSD regex library. We don't define
78035796c8dcSSimon Schubert them unless specifically requested. */
78045796c8dcSSimon Schubert
78055796c8dcSSimon Schubert #if defined _REGEX_RE_COMP || defined _LIBC
78065796c8dcSSimon Schubert
78075796c8dcSSimon Schubert /* BSD has one and only one pattern buffer. */
78085796c8dcSSimon Schubert static struct re_pattern_buffer re_comp_buf;
78095796c8dcSSimon Schubert
78105796c8dcSSimon Schubert char *
78115796c8dcSSimon Schubert #ifdef _LIBC
78125796c8dcSSimon Schubert /* Make these definitions weak in libc, so POSIX programs can redefine
78135796c8dcSSimon Schubert these names if they don't use our functions, and still use
78145796c8dcSSimon Schubert regcomp/regexec below without link errors. */
78155796c8dcSSimon Schubert weak_function
78165796c8dcSSimon Schubert #endif
78175796c8dcSSimon Schubert re_comp (const char *s)
78185796c8dcSSimon Schubert {
78195796c8dcSSimon Schubert reg_errcode_t ret;
78205796c8dcSSimon Schubert
78215796c8dcSSimon Schubert if (!s)
78225796c8dcSSimon Schubert {
78235796c8dcSSimon Schubert if (!re_comp_buf.buffer)
78245796c8dcSSimon Schubert return (char *) gettext ("No previous regular expression");
78255796c8dcSSimon Schubert return 0;
78265796c8dcSSimon Schubert }
78275796c8dcSSimon Schubert
78285796c8dcSSimon Schubert if (!re_comp_buf.buffer)
78295796c8dcSSimon Schubert {
78305796c8dcSSimon Schubert re_comp_buf.buffer = (unsigned char *) malloc (200);
78315796c8dcSSimon Schubert if (re_comp_buf.buffer == NULL)
78325796c8dcSSimon Schubert return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
78335796c8dcSSimon Schubert re_comp_buf.allocated = 200;
78345796c8dcSSimon Schubert
78355796c8dcSSimon Schubert re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
78365796c8dcSSimon Schubert if (re_comp_buf.fastmap == NULL)
78375796c8dcSSimon Schubert return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
78385796c8dcSSimon Schubert }
78395796c8dcSSimon Schubert
78405796c8dcSSimon Schubert /* Since `re_exec' always passes NULL for the `regs' argument, we
78415796c8dcSSimon Schubert don't need to initialize the pattern buffer fields which affect it. */
78425796c8dcSSimon Schubert
78435796c8dcSSimon Schubert /* Match anchors at newlines. */
78445796c8dcSSimon Schubert re_comp_buf.newline_anchor = 1;
78455796c8dcSSimon Schubert
78465796c8dcSSimon Schubert # ifdef MBS_SUPPORT
78475796c8dcSSimon Schubert if (MB_CUR_MAX != 1)
78485796c8dcSSimon Schubert ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
78495796c8dcSSimon Schubert else
78505796c8dcSSimon Schubert # endif
78515796c8dcSSimon Schubert ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
78525796c8dcSSimon Schubert
78535796c8dcSSimon Schubert if (!ret)
78545796c8dcSSimon Schubert return NULL;
78555796c8dcSSimon Schubert
78565796c8dcSSimon Schubert /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
78575796c8dcSSimon Schubert return (char *) gettext (re_error_msgid[(int) ret]);
78585796c8dcSSimon Schubert }
78595796c8dcSSimon Schubert
78605796c8dcSSimon Schubert
78615796c8dcSSimon Schubert int
78625796c8dcSSimon Schubert #ifdef _LIBC
78635796c8dcSSimon Schubert weak_function
78645796c8dcSSimon Schubert #endif
78655796c8dcSSimon Schubert re_exec (const char *s)
78665796c8dcSSimon Schubert {
78675796c8dcSSimon Schubert const int len = strlen (s);
78685796c8dcSSimon Schubert return
78695796c8dcSSimon Schubert 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
78705796c8dcSSimon Schubert }
78715796c8dcSSimon Schubert
78725796c8dcSSimon Schubert #endif /* _REGEX_RE_COMP */
78735796c8dcSSimon Schubert
78745796c8dcSSimon Schubert /* POSIX.2 functions. Don't define these for Emacs. */
78755796c8dcSSimon Schubert
78765796c8dcSSimon Schubert #ifndef emacs
78775796c8dcSSimon Schubert
78785796c8dcSSimon Schubert /* regcomp takes a regular expression as a string and compiles it.
78795796c8dcSSimon Schubert
78805796c8dcSSimon Schubert PREG is a regex_t *. We do not expect any fields to be initialized,
78815796c8dcSSimon Schubert since POSIX says we shouldn't. Thus, we set
78825796c8dcSSimon Schubert
78835796c8dcSSimon Schubert `buffer' to the compiled pattern;
78845796c8dcSSimon Schubert `used' to the length of the compiled pattern;
78855796c8dcSSimon Schubert `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
78865796c8dcSSimon Schubert REG_EXTENDED bit in CFLAGS is set; otherwise, to
78875796c8dcSSimon Schubert RE_SYNTAX_POSIX_BASIC;
78885796c8dcSSimon Schubert `newline_anchor' to REG_NEWLINE being set in CFLAGS;
78895796c8dcSSimon Schubert `fastmap' to an allocated space for the fastmap;
78905796c8dcSSimon Schubert `fastmap_accurate' to zero;
78915796c8dcSSimon Schubert `re_nsub' to the number of subexpressions in PATTERN.
78925796c8dcSSimon Schubert
78935796c8dcSSimon Schubert PATTERN is the address of the pattern string.
78945796c8dcSSimon Schubert
78955796c8dcSSimon Schubert CFLAGS is a series of bits which affect compilation.
78965796c8dcSSimon Schubert
78975796c8dcSSimon Schubert If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
78985796c8dcSSimon Schubert use POSIX basic syntax.
78995796c8dcSSimon Schubert
79005796c8dcSSimon Schubert If REG_NEWLINE is set, then . and [^...] don't match newline.
79015796c8dcSSimon Schubert Also, regexec will try a match beginning after every newline.
79025796c8dcSSimon Schubert
79035796c8dcSSimon Schubert If REG_ICASE is set, then we considers upper- and lowercase
79045796c8dcSSimon Schubert versions of letters to be equivalent when matching.
79055796c8dcSSimon Schubert
79065796c8dcSSimon Schubert If REG_NOSUB is set, then when PREG is passed to regexec, that
79075796c8dcSSimon Schubert routine will report only success or failure, and nothing about the
79085796c8dcSSimon Schubert registers.
79095796c8dcSSimon Schubert
79105796c8dcSSimon Schubert It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
79115796c8dcSSimon Schubert the return codes and their meanings.) */
79125796c8dcSSimon Schubert
79135796c8dcSSimon Schubert int
79145796c8dcSSimon Schubert regcomp (regex_t *preg, const char *pattern, int cflags)
79155796c8dcSSimon Schubert {
79165796c8dcSSimon Schubert reg_errcode_t ret;
79175796c8dcSSimon Schubert reg_syntax_t syntax
79185796c8dcSSimon Schubert = (cflags & REG_EXTENDED) ?
79195796c8dcSSimon Schubert RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
79205796c8dcSSimon Schubert
79215796c8dcSSimon Schubert /* regex_compile will allocate the space for the compiled pattern. */
79225796c8dcSSimon Schubert preg->buffer = 0;
79235796c8dcSSimon Schubert preg->allocated = 0;
79245796c8dcSSimon Schubert preg->used = 0;
79255796c8dcSSimon Schubert
79265796c8dcSSimon Schubert /* Try to allocate space for the fastmap. */
79275796c8dcSSimon Schubert preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
79285796c8dcSSimon Schubert
79295796c8dcSSimon Schubert if (cflags & REG_ICASE)
79305796c8dcSSimon Schubert {
79315796c8dcSSimon Schubert int i;
79325796c8dcSSimon Schubert
79335796c8dcSSimon Schubert preg->translate
79345796c8dcSSimon Schubert = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
79355796c8dcSSimon Schubert * sizeof (*(RE_TRANSLATE_TYPE)0));
79365796c8dcSSimon Schubert if (preg->translate == NULL)
79375796c8dcSSimon Schubert return (int) REG_ESPACE;
79385796c8dcSSimon Schubert
79395796c8dcSSimon Schubert /* Map uppercase characters to corresponding lowercase ones. */
79405796c8dcSSimon Schubert for (i = 0; i < CHAR_SET_SIZE; i++)
79415796c8dcSSimon Schubert preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
79425796c8dcSSimon Schubert }
79435796c8dcSSimon Schubert else
79445796c8dcSSimon Schubert preg->translate = NULL;
79455796c8dcSSimon Schubert
79465796c8dcSSimon Schubert /* If REG_NEWLINE is set, newlines are treated differently. */
79475796c8dcSSimon Schubert if (cflags & REG_NEWLINE)
79485796c8dcSSimon Schubert { /* REG_NEWLINE implies neither . nor [^...] match newline. */
79495796c8dcSSimon Schubert syntax &= ~RE_DOT_NEWLINE;
79505796c8dcSSimon Schubert syntax |= RE_HAT_LISTS_NOT_NEWLINE;
79515796c8dcSSimon Schubert /* It also changes the matching behavior. */
79525796c8dcSSimon Schubert preg->newline_anchor = 1;
79535796c8dcSSimon Schubert }
79545796c8dcSSimon Schubert else
79555796c8dcSSimon Schubert preg->newline_anchor = 0;
79565796c8dcSSimon Schubert
79575796c8dcSSimon Schubert preg->no_sub = !!(cflags & REG_NOSUB);
79585796c8dcSSimon Schubert
79595796c8dcSSimon Schubert /* POSIX says a null character in the pattern terminates it, so we
79605796c8dcSSimon Schubert can use strlen here in compiling the pattern. */
79615796c8dcSSimon Schubert # ifdef MBS_SUPPORT
79625796c8dcSSimon Schubert if (MB_CUR_MAX != 1)
79635796c8dcSSimon Schubert ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
79645796c8dcSSimon Schubert else
79655796c8dcSSimon Schubert # endif
79665796c8dcSSimon Schubert ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
79675796c8dcSSimon Schubert
79685796c8dcSSimon Schubert /* POSIX doesn't distinguish between an unmatched open-group and an
79695796c8dcSSimon Schubert unmatched close-group: both are REG_EPAREN. */
79705796c8dcSSimon Schubert if (ret == REG_ERPAREN) ret = REG_EPAREN;
79715796c8dcSSimon Schubert
79725796c8dcSSimon Schubert if (ret == REG_NOERROR && preg->fastmap)
79735796c8dcSSimon Schubert {
79745796c8dcSSimon Schubert /* Compute the fastmap now, since regexec cannot modify the pattern
79755796c8dcSSimon Schubert buffer. */
79765796c8dcSSimon Schubert if (re_compile_fastmap (preg) == -2)
79775796c8dcSSimon Schubert {
79785796c8dcSSimon Schubert /* Some error occurred while computing the fastmap, just forget
79795796c8dcSSimon Schubert about it. */
79805796c8dcSSimon Schubert free (preg->fastmap);
79815796c8dcSSimon Schubert preg->fastmap = NULL;
79825796c8dcSSimon Schubert }
79835796c8dcSSimon Schubert }
79845796c8dcSSimon Schubert
79855796c8dcSSimon Schubert return (int) ret;
79865796c8dcSSimon Schubert }
79875796c8dcSSimon Schubert #ifdef _LIBC
79885796c8dcSSimon Schubert weak_alias (__regcomp, regcomp)
79895796c8dcSSimon Schubert #endif
79905796c8dcSSimon Schubert
79915796c8dcSSimon Schubert
79925796c8dcSSimon Schubert /* regexec searches for a given pattern, specified by PREG, in the
79935796c8dcSSimon Schubert string STRING.
79945796c8dcSSimon Schubert
79955796c8dcSSimon Schubert If NMATCH is zero or REG_NOSUB was set in the cflags argument to
79965796c8dcSSimon Schubert `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
79975796c8dcSSimon Schubert least NMATCH elements, and we set them to the offsets of the
79985796c8dcSSimon Schubert corresponding matched substrings.
79995796c8dcSSimon Schubert
80005796c8dcSSimon Schubert EFLAGS specifies `execution flags' which affect matching: if
80015796c8dcSSimon Schubert REG_NOTBOL is set, then ^ does not match at the beginning of the
80025796c8dcSSimon Schubert string; if REG_NOTEOL is set, then $ does not match at the end.
80035796c8dcSSimon Schubert
80045796c8dcSSimon Schubert We return 0 if we find a match and REG_NOMATCH if not. */
80055796c8dcSSimon Schubert
80065796c8dcSSimon Schubert int
80075796c8dcSSimon Schubert regexec (const regex_t *preg, const char *string, size_t nmatch,
80085796c8dcSSimon Schubert regmatch_t pmatch[], int eflags)
80095796c8dcSSimon Schubert {
80105796c8dcSSimon Schubert int ret;
80115796c8dcSSimon Schubert struct re_registers regs;
80125796c8dcSSimon Schubert regex_t private_preg;
80135796c8dcSSimon Schubert int len = strlen (string);
80145796c8dcSSimon Schubert boolean want_reg_info = !preg->no_sub && nmatch > 0;
80155796c8dcSSimon Schubert
80165796c8dcSSimon Schubert private_preg = *preg;
80175796c8dcSSimon Schubert
80185796c8dcSSimon Schubert private_preg.not_bol = !!(eflags & REG_NOTBOL);
80195796c8dcSSimon Schubert private_preg.not_eol = !!(eflags & REG_NOTEOL);
80205796c8dcSSimon Schubert
80215796c8dcSSimon Schubert /* The user has told us exactly how many registers to return
80225796c8dcSSimon Schubert information about, via `nmatch'. We have to pass that on to the
80235796c8dcSSimon Schubert matching routines. */
80245796c8dcSSimon Schubert private_preg.regs_allocated = REGS_FIXED;
80255796c8dcSSimon Schubert
80265796c8dcSSimon Schubert if (want_reg_info)
80275796c8dcSSimon Schubert {
80285796c8dcSSimon Schubert regs.num_regs = nmatch;
80295796c8dcSSimon Schubert regs.start = TALLOC (nmatch * 2, regoff_t);
80305796c8dcSSimon Schubert if (regs.start == NULL)
80315796c8dcSSimon Schubert return (int) REG_NOMATCH;
80325796c8dcSSimon Schubert regs.end = regs.start + nmatch;
80335796c8dcSSimon Schubert }
80345796c8dcSSimon Schubert
80355796c8dcSSimon Schubert /* Perform the searching operation. */
80365796c8dcSSimon Schubert ret = re_search (&private_preg, string, len,
80375796c8dcSSimon Schubert /* start: */ 0, /* range: */ len,
80385796c8dcSSimon Schubert want_reg_info ? ®s : (struct re_registers *) 0);
80395796c8dcSSimon Schubert
80405796c8dcSSimon Schubert /* Copy the register information to the POSIX structure. */
80415796c8dcSSimon Schubert if (want_reg_info)
80425796c8dcSSimon Schubert {
80435796c8dcSSimon Schubert if (ret >= 0)
80445796c8dcSSimon Schubert {
80455796c8dcSSimon Schubert unsigned r;
80465796c8dcSSimon Schubert
80475796c8dcSSimon Schubert for (r = 0; r < nmatch; r++)
80485796c8dcSSimon Schubert {
80495796c8dcSSimon Schubert pmatch[r].rm_so = regs.start[r];
80505796c8dcSSimon Schubert pmatch[r].rm_eo = regs.end[r];
80515796c8dcSSimon Schubert }
80525796c8dcSSimon Schubert }
80535796c8dcSSimon Schubert
80545796c8dcSSimon Schubert /* If we needed the temporary register info, free the space now. */
80555796c8dcSSimon Schubert free (regs.start);
80565796c8dcSSimon Schubert }
80575796c8dcSSimon Schubert
80585796c8dcSSimon Schubert /* We want zero return to mean success, unlike `re_search'. */
80595796c8dcSSimon Schubert return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
80605796c8dcSSimon Schubert }
80615796c8dcSSimon Schubert #ifdef _LIBC
80625796c8dcSSimon Schubert weak_alias (__regexec, regexec)
80635796c8dcSSimon Schubert #endif
80645796c8dcSSimon Schubert
80655796c8dcSSimon Schubert
80665796c8dcSSimon Schubert /* Returns a message corresponding to an error code, ERRCODE, returned
80675796c8dcSSimon Schubert from either regcomp or regexec. We don't use PREG here. */
80685796c8dcSSimon Schubert
80695796c8dcSSimon Schubert size_t
80705796c8dcSSimon Schubert regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
80715796c8dcSSimon Schubert char *errbuf, size_t errbuf_size)
80725796c8dcSSimon Schubert {
80735796c8dcSSimon Schubert const char *msg;
80745796c8dcSSimon Schubert size_t msg_size;
80755796c8dcSSimon Schubert
80765796c8dcSSimon Schubert if (errcode < 0
80775796c8dcSSimon Schubert || errcode >= (int) (sizeof (re_error_msgid)
80785796c8dcSSimon Schubert / sizeof (re_error_msgid[0])))
80795796c8dcSSimon Schubert /* Only error codes returned by the rest of the code should be passed
80805796c8dcSSimon Schubert to this routine. If we are given anything else, or if other regex
80815796c8dcSSimon Schubert code generates an invalid error code, then the program has a bug.
80825796c8dcSSimon Schubert Dump core so we can fix it. */
80835796c8dcSSimon Schubert abort ();
80845796c8dcSSimon Schubert
80855796c8dcSSimon Schubert msg = gettext (re_error_msgid[errcode]);
80865796c8dcSSimon Schubert
80875796c8dcSSimon Schubert msg_size = strlen (msg) + 1; /* Includes the null. */
80885796c8dcSSimon Schubert
80895796c8dcSSimon Schubert if (errbuf_size != 0)
80905796c8dcSSimon Schubert {
80915796c8dcSSimon Schubert if (msg_size > errbuf_size)
80925796c8dcSSimon Schubert {
80935796c8dcSSimon Schubert #if defined HAVE_MEMPCPY || defined _LIBC
80945796c8dcSSimon Schubert *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
80955796c8dcSSimon Schubert #else
80965796c8dcSSimon Schubert memcpy (errbuf, msg, errbuf_size - 1);
80975796c8dcSSimon Schubert errbuf[errbuf_size - 1] = 0;
80985796c8dcSSimon Schubert #endif
80995796c8dcSSimon Schubert }
81005796c8dcSSimon Schubert else
81015796c8dcSSimon Schubert memcpy (errbuf, msg, msg_size);
81025796c8dcSSimon Schubert }
81035796c8dcSSimon Schubert
81045796c8dcSSimon Schubert return msg_size;
81055796c8dcSSimon Schubert }
81065796c8dcSSimon Schubert #ifdef _LIBC
81075796c8dcSSimon Schubert weak_alias (__regerror, regerror)
81085796c8dcSSimon Schubert #endif
81095796c8dcSSimon Schubert
81105796c8dcSSimon Schubert
81115796c8dcSSimon Schubert /* Free dynamically allocated space used by PREG. */
81125796c8dcSSimon Schubert
81135796c8dcSSimon Schubert void
81145796c8dcSSimon Schubert regfree (regex_t *preg)
81155796c8dcSSimon Schubert {
81165796c8dcSSimon Schubert free (preg->buffer);
81175796c8dcSSimon Schubert preg->buffer = NULL;
81185796c8dcSSimon Schubert
81195796c8dcSSimon Schubert preg->allocated = 0;
81205796c8dcSSimon Schubert preg->used = 0;
81215796c8dcSSimon Schubert
81225796c8dcSSimon Schubert free (preg->fastmap);
81235796c8dcSSimon Schubert preg->fastmap = NULL;
81245796c8dcSSimon Schubert preg->fastmap_accurate = 0;
81255796c8dcSSimon Schubert
81265796c8dcSSimon Schubert free (preg->translate);
81275796c8dcSSimon Schubert preg->translate = NULL;
81285796c8dcSSimon Schubert }
81295796c8dcSSimon Schubert #ifdef _LIBC
81305796c8dcSSimon Schubert weak_alias (__regfree, regfree)
81315796c8dcSSimon Schubert #endif
81325796c8dcSSimon Schubert
81335796c8dcSSimon Schubert #endif /* not emacs */
81345796c8dcSSimon Schubert
81355796c8dcSSimon Schubert #endif /* not INSIDE_RECURSION */
81365796c8dcSSimon Schubert
81375796c8dcSSimon Schubert
81385796c8dcSSimon Schubert #undef STORE_NUMBER
81395796c8dcSSimon Schubert #undef STORE_NUMBER_AND_INCR
81405796c8dcSSimon Schubert #undef EXTRACT_NUMBER
81415796c8dcSSimon Schubert #undef EXTRACT_NUMBER_AND_INCR
81425796c8dcSSimon Schubert
81435796c8dcSSimon Schubert #undef DEBUG_PRINT_COMPILED_PATTERN
81445796c8dcSSimon Schubert #undef DEBUG_PRINT_DOUBLE_STRING
81455796c8dcSSimon Schubert
81465796c8dcSSimon Schubert #undef INIT_FAIL_STACK
81475796c8dcSSimon Schubert #undef RESET_FAIL_STACK
81485796c8dcSSimon Schubert #undef DOUBLE_FAIL_STACK
81495796c8dcSSimon Schubert #undef PUSH_PATTERN_OP
81505796c8dcSSimon Schubert #undef PUSH_FAILURE_POINTER
81515796c8dcSSimon Schubert #undef PUSH_FAILURE_INT
81525796c8dcSSimon Schubert #undef PUSH_FAILURE_ELT
81535796c8dcSSimon Schubert #undef POP_FAILURE_POINTER
81545796c8dcSSimon Schubert #undef POP_FAILURE_INT
81555796c8dcSSimon Schubert #undef POP_FAILURE_ELT
81565796c8dcSSimon Schubert #undef DEBUG_PUSH
81575796c8dcSSimon Schubert #undef DEBUG_POP
81585796c8dcSSimon Schubert #undef PUSH_FAILURE_POINT
81595796c8dcSSimon Schubert #undef POP_FAILURE_POINT
81605796c8dcSSimon Schubert
81615796c8dcSSimon Schubert #undef REG_UNSET_VALUE
81625796c8dcSSimon Schubert #undef REG_UNSET
81635796c8dcSSimon Schubert
81645796c8dcSSimon Schubert #undef PATFETCH
81655796c8dcSSimon Schubert #undef PATFETCH_RAW
81665796c8dcSSimon Schubert #undef PATUNFETCH
81675796c8dcSSimon Schubert #undef TRANSLATE
81685796c8dcSSimon Schubert
81695796c8dcSSimon Schubert #undef INIT_BUF_SIZE
81705796c8dcSSimon Schubert #undef GET_BUFFER_SPACE
81715796c8dcSSimon Schubert #undef BUF_PUSH
81725796c8dcSSimon Schubert #undef BUF_PUSH_2
81735796c8dcSSimon Schubert #undef BUF_PUSH_3
81745796c8dcSSimon Schubert #undef STORE_JUMP
81755796c8dcSSimon Schubert #undef STORE_JUMP2
81765796c8dcSSimon Schubert #undef INSERT_JUMP
81775796c8dcSSimon Schubert #undef INSERT_JUMP2
81785796c8dcSSimon Schubert #undef EXTEND_BUFFER
81795796c8dcSSimon Schubert #undef GET_UNSIGNED_NUMBER
81805796c8dcSSimon Schubert #undef FREE_STACK_RETURN
81815796c8dcSSimon Schubert
81825796c8dcSSimon Schubert # undef POINTER_TO_OFFSET
81835796c8dcSSimon Schubert # undef MATCHING_IN_FRST_STRING
81845796c8dcSSimon Schubert # undef PREFETCH
81855796c8dcSSimon Schubert # undef AT_STRINGS_BEG
81865796c8dcSSimon Schubert # undef AT_STRINGS_END
81875796c8dcSSimon Schubert # undef WORDCHAR_P
81885796c8dcSSimon Schubert # undef FREE_VAR
81895796c8dcSSimon Schubert # undef FREE_VARIABLES
81905796c8dcSSimon Schubert # undef NO_HIGHEST_ACTIVE_REG
81915796c8dcSSimon Schubert # undef NO_LOWEST_ACTIVE_REG
81925796c8dcSSimon Schubert
81935796c8dcSSimon Schubert # undef CHAR_T
81945796c8dcSSimon Schubert # undef UCHAR_T
81955796c8dcSSimon Schubert # undef COMPILED_BUFFER_VAR
81965796c8dcSSimon Schubert # undef OFFSET_ADDRESS_SIZE
81975796c8dcSSimon Schubert # undef CHAR_CLASS_SIZE
81985796c8dcSSimon Schubert # undef PREFIX
81995796c8dcSSimon Schubert # undef ARG_PREFIX
82005796c8dcSSimon Schubert # undef PUT_CHAR
82015796c8dcSSimon Schubert # undef BYTE
82025796c8dcSSimon Schubert # undef WCHAR
82035796c8dcSSimon Schubert
82045796c8dcSSimon Schubert # define DEFINED_ONCE
8205