xref: /freebsd/lib/libc/regex/regcomp.c (revision f77b5b29)
158f0484fSRodney W. Grimes /*-
28a16b7a1SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
38a16b7a1SPedro F. Giffuni  *
458f0484fSRodney W. Grimes  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
558f0484fSRodney W. Grimes  * Copyright (c) 1992, 1993, 1994
658f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
758f0484fSRodney W. Grimes  *
83c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
95b5fa75aSEd Maste  *
103c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
113c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
123c87aa1dSDavid Chisnall  *
1358f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
1458f0484fSRodney W. Grimes  * Henry Spencer.
1558f0484fSRodney W. Grimes  *
1658f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1758f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1858f0484fSRodney W. Grimes  * are met:
1958f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
2058f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
2158f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
2258f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
2358f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
24fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
2558f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2658f0484fSRodney W. Grimes  *    without specific prior written permission.
2758f0484fSRodney W. Grimes  *
2858f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2958f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3058f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3158f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3258f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3358f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3458f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3558f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3658f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3758f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3858f0484fSRodney W. Grimes  * SUCH DAMAGE.
3958f0484fSRodney W. Grimes  */
4058f0484fSRodney W. Grimes 
4158f0484fSRodney W. Grimes #include <sys/types.h>
4258f0484fSRodney W. Grimes #include <stdio.h>
4358f0484fSRodney W. Grimes #include <string.h>
4458f0484fSRodney W. Grimes #include <ctype.h>
4558f0484fSRodney W. Grimes #include <limits.h>
4658f0484fSRodney W. Grimes #include <stdlib.h>
4758f0484fSRodney W. Grimes #include <regex.h>
4815ae9efaSKyle Evans #include <stdbool.h>
49e5996857STim J. Robbins #include <wchar.h>
50e5996857STim J. Robbins #include <wctype.h>
5158f0484fSRodney W. Grimes 
52fe5bf674SKyle Evans #ifndef LIBREGEX
531daad8f5SAndrey A. Chernov #include "collate.h"
54fe5bf674SKyle Evans #endif
551daad8f5SAndrey A. Chernov 
5658f0484fSRodney W. Grimes #include "utils.h"
5758f0484fSRodney W. Grimes #include "regex2.h"
5858f0484fSRodney W. Grimes 
5958f0484fSRodney W. Grimes #include "cname.h"
6058f0484fSRodney W. Grimes 
6158f0484fSRodney W. Grimes /*
6215ae9efaSKyle Evans  * Branching context, used to keep track of branch state for all of the branch-
6315ae9efaSKyle Evans  * aware functions. In addition to keeping track of branch positions for the
6415ae9efaSKyle Evans  * p_branch_* functions, we use this to simplify some clumsiness in BREs for
6515ae9efaSKyle Evans  * detection of whether ^ is acting as an anchor or being used erroneously and
6615ae9efaSKyle Evans  * also for whether we're in a sub-expression or not.
6715ae9efaSKyle Evans  */
6815ae9efaSKyle Evans struct branchc {
6915ae9efaSKyle Evans 	sopno start;
7015ae9efaSKyle Evans 	sopno back;
7115ae9efaSKyle Evans 	sopno fwd;
7215ae9efaSKyle Evans 
7315ae9efaSKyle Evans 	int nbranch;
7415ae9efaSKyle Evans 	int nchain;
7515ae9efaSKyle Evans 	bool outer;
7615ae9efaSKyle Evans 	bool terminate;
7715ae9efaSKyle Evans };
7815ae9efaSKyle Evans 
7915ae9efaSKyle Evans /*
8058f0484fSRodney W. Grimes  * parse structure, passed up and down to avoid global variables and
8158f0484fSRodney W. Grimes  * other clumsinesses
8258f0484fSRodney W. Grimes  */
8358f0484fSRodney W. Grimes struct parse {
848d0f9a93SPedro F. Giffuni 	const char *next;	/* next character in RE */
858d0f9a93SPedro F. Giffuni 	const char *end;	/* end of string (-> NUL normally) */
8658f0484fSRodney W. Grimes 	int error;		/* has an error been seen? */
8718a1e2e9SKyle Evans 	int gnuext;
8858f0484fSRodney W. Grimes 	sop *strip;		/* malloced strip */
8958f0484fSRodney W. Grimes 	sopno ssize;		/* malloced strip size (allocated) */
9058f0484fSRodney W. Grimes 	sopno slen;		/* malloced strip length (used) */
9158f0484fSRodney W. Grimes 	int ncsalloc;		/* number of csets allocated */
9258f0484fSRodney W. Grimes 	struct re_guts *g;
9358f0484fSRodney W. Grimes #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
9458f0484fSRodney W. Grimes 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
9558f0484fSRodney W. Grimes 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
9615ae9efaSKyle Evans 	bool allowbranch;	/* can this expression branch? */
9715ae9efaSKyle Evans 	bool bre;		/* convenience; is this a BRE? */
98adeebf4cSKyle Evans 	int pflags;		/* other parsing flags -- legacy escapes? */
9915ae9efaSKyle Evans 	bool (*parse_expr)(struct parse *, struct branchc *);
10015ae9efaSKyle Evans 	void (*pre_parse)(struct parse *, struct branchc *);
10115ae9efaSKyle Evans 	void (*post_parse)(struct parse *, struct branchc *);
10258f0484fSRodney W. Grimes };
10358f0484fSRodney W. Grimes 
104adeebf4cSKyle Evans #define PFLAG_LEGACY_ESC	0x00000001
105adeebf4cSKyle Evans 
10658f0484fSRodney W. Grimes /* ========= begin header generated by ./mkh ========= */
10758f0484fSRodney W. Grimes #ifdef __cplusplus
10858f0484fSRodney W. Grimes extern "C" {
10958f0484fSRodney W. Grimes #endif
11058f0484fSRodney W. Grimes 
11158f0484fSRodney W. Grimes /* === regcomp.c === */
11215ae9efaSKyle Evans static bool p_ere_exp(struct parse *p, struct branchc *bc);
113c05ac53bSDavid E. O'Brien static void p_str(struct parse *p);
11415ae9efaSKyle Evans static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
11515ae9efaSKyle Evans static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
11615ae9efaSKyle Evans static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
1173ea376f6SKyle Evans static bool p_branch_empty(struct parse *p, struct branchc *bc);
11815ae9efaSKyle Evans static bool p_branch_do(struct parse *p, struct branchc *bc);
11915ae9efaSKyle Evans static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
12015ae9efaSKyle Evans static void p_bre_post_parse(struct parse *p, struct branchc *bc);
12115ae9efaSKyle Evans static void p_re(struct parse *p, int end1, int end2);
12215ae9efaSKyle Evans static bool p_simp_re(struct parse *p, struct branchc *bc);
123c05ac53bSDavid E. O'Brien static int p_count(struct parse *p);
124c05ac53bSDavid E. O'Brien static void p_bracket(struct parse *p);
125fe5bf674SKyle Evans static int p_range_cmp(wchar_t c1, wchar_t c2);
126c05ac53bSDavid E. O'Brien static void p_b_term(struct parse *p, cset *cs);
12718a1e2e9SKyle Evans static int p_b_pseudoclass(struct parse *p, char c);
128c05ac53bSDavid E. O'Brien static void p_b_cclass(struct parse *p, cset *cs);
12918a1e2e9SKyle Evans static void p_b_cclass_named(struct parse *p, cset *cs, const char[]);
130c05ac53bSDavid E. O'Brien static void p_b_eclass(struct parse *p, cset *cs);
131e5996857STim J. Robbins static wint_t p_b_symbol(struct parse *p);
132e5996857STim J. Robbins static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
133adeebf4cSKyle Evans static bool may_escape(struct parse *p, const wint_t ch);
134e5996857STim J. Robbins static wint_t othercase(wint_t ch);
135e5996857STim J. Robbins static void bothcases(struct parse *p, wint_t ch);
136e5996857STim J. Robbins static void ordinary(struct parse *p, wint_t ch);
137c05ac53bSDavid E. O'Brien static void nonnewline(struct parse *p);
138c05ac53bSDavid E. O'Brien static void repeat(struct parse *p, sopno start, int from, int to);
139c05ac53bSDavid E. O'Brien static int seterr(struct parse *p, int e);
140c05ac53bSDavid E. O'Brien static cset *allocset(struct parse *p);
141c05ac53bSDavid E. O'Brien static void freeset(struct parse *p, cset *cs);
142e5996857STim J. Robbins static void CHadd(struct parse *p, cset *cs, wint_t ch);
143e5996857STim J. Robbins static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
144e5996857STim J. Robbins static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
145e5996857STim J. Robbins static wint_t singleton(cset *cs);
146c05ac53bSDavid E. O'Brien static sopno dupl(struct parse *p, sopno start, sopno finish);
147c05ac53bSDavid E. O'Brien static void doemit(struct parse *p, sop op, size_t opnd);
148c05ac53bSDavid E. O'Brien static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
149c05ac53bSDavid E. O'Brien static void dofwd(struct parse *p, sopno pos, sop value);
1502bf213ebSKevin Lo static int enlarge(struct parse *p, sopno size);
151c05ac53bSDavid E. O'Brien static void stripsnug(struct parse *p, struct re_guts *g);
152c05ac53bSDavid E. O'Brien static void findmust(struct parse *p, struct re_guts *g);
15380f36366STim J. Robbins static int altoffset(sop *scan, int offset);
154c05ac53bSDavid E. O'Brien static void computejumps(struct parse *p, struct re_guts *g);
155c05ac53bSDavid E. O'Brien static void computematchjumps(struct parse *p, struct re_guts *g);
156c05ac53bSDavid E. O'Brien static sopno pluscount(struct parse *p, struct re_guts *g);
157e5996857STim J. Robbins static wint_t wgetnext(struct parse *p);
15858f0484fSRodney W. Grimes 
15958f0484fSRodney W. Grimes #ifdef __cplusplus
16058f0484fSRodney W. Grimes }
16158f0484fSRodney W. Grimes #endif
16258f0484fSRodney W. Grimes /* ========= end header generated by ./mkh ========= */
16358f0484fSRodney W. Grimes 
16458f0484fSRodney W. Grimes static char nuls[10];		/* place to point scanner in event of error */
16558f0484fSRodney W. Grimes 
16658f0484fSRodney W. Grimes /*
16758f0484fSRodney W. Grimes  * macros for use with parse structure
16858f0484fSRodney W. Grimes  * BEWARE:  these know that the parse structure is named `p' !!!
16958f0484fSRodney W. Grimes  */
17058f0484fSRodney W. Grimes #define	PEEK()	(*p->next)
17158f0484fSRodney W. Grimes #define	PEEK2()	(*(p->next+1))
172d36b5dbeSMiod Vallat #define	MORE()	(p->end - p->next > 0)
173d36b5dbeSMiod Vallat #define	MORE2()	(p->end - p->next > 1)
17458f0484fSRodney W. Grimes #define	SEE(c)	(MORE() && PEEK() == (c))
175d36b5dbeSMiod Vallat #define	SEETWO(a, b)	(MORE2() && PEEK() == (a) && PEEK2() == (b))
17615ae9efaSKyle Evans #define	SEESPEC(a)	(p->bre ? SEETWO('\\', a) : SEE(a))
17758f0484fSRodney W. Grimes #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
17858f0484fSRodney W. Grimes #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
17918a1e2e9SKyle Evans #define	EATSPEC(a)	(p->bre ? EATTWO('\\', a) : EAT(a))
18058f0484fSRodney W. Grimes #define	NEXT()	(p->next++)
18158f0484fSRodney W. Grimes #define	NEXT2()	(p->next += 2)
18258f0484fSRodney W. Grimes #define	NEXTn(n)	(p->next += (n))
18358f0484fSRodney W. Grimes #define	GETNEXT()	(*p->next++)
184e5996857STim J. Robbins #define	WGETNEXT()	wgetnext(p)
18558f0484fSRodney W. Grimes #define	SETERROR(e)	seterr(p, (e))
18658f0484fSRodney W. Grimes #define	REQUIRE(co, e)	((co) || SETERROR(e))
18758f0484fSRodney W. Grimes #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
18858f0484fSRodney W. Grimes #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
18958f0484fSRodney W. Grimes #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
19058f0484fSRodney W. Grimes #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
19158f0484fSRodney W. Grimes #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
19258f0484fSRodney W. Grimes #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
19358f0484fSRodney W. Grimes #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
19458f0484fSRodney W. Grimes #define	HERE()		(p->slen)
19558f0484fSRodney W. Grimes #define	THERE()		(p->slen - 1)
19658f0484fSRodney W. Grimes #define	THERETHERE()	(p->slen - 2)
19758f0484fSRodney W. Grimes #define	DROP(n)	(p->slen -= (n))
19858f0484fSRodney W. Grimes 
1996049d9f0SDaniel C. Sobral /* Macro used by computejump()/computematchjump() */
2006049d9f0SDaniel C. Sobral #define MIN(a,b)	((a)<(b)?(a):(b))
2016049d9f0SDaniel C. Sobral 
202adeebf4cSKyle Evans static int				/* 0 success, otherwise REG_something */
regcomp_internal(regex_t * __restrict preg,const char * __restrict pattern,int cflags,int pflags)203adeebf4cSKyle Evans regcomp_internal(regex_t * __restrict preg,
20454a648d1SXin LI 	const char * __restrict pattern,
205adeebf4cSKyle Evans 	int cflags, int pflags)
20658f0484fSRodney W. Grimes {
20758f0484fSRodney W. Grimes 	struct parse pa;
2088fb3f3f6SDavid E. O'Brien 	struct re_guts *g;
2098fb3f3f6SDavid E. O'Brien 	struct parse *p = &pa;
2108fb3f3f6SDavid E. O'Brien 	int i;
2118fb3f3f6SDavid E. O'Brien 	size_t len;
212a89629abSXin LI 	size_t maxlen;
21358f0484fSRodney W. Grimes #ifdef REDEBUG
21458f0484fSRodney W. Grimes #	define	GOODFLAGS(f)	(f)
21558f0484fSRodney W. Grimes #else
21658f0484fSRodney W. Grimes #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
21758f0484fSRodney W. Grimes #endif
21858f0484fSRodney W. Grimes 
21958f0484fSRodney W. Grimes 	cflags = GOODFLAGS(cflags);
22058f0484fSRodney W. Grimes 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
22158f0484fSRodney W. Grimes 		return(REG_INVARG);
22258f0484fSRodney W. Grimes 
22358f0484fSRodney W. Grimes 	if (cflags&REG_PEND) {
22458f0484fSRodney W. Grimes 		if (preg->re_endp < pattern)
22558f0484fSRodney W. Grimes 			return(REG_INVARG);
22658f0484fSRodney W. Grimes 		len = preg->re_endp - pattern;
22758f0484fSRodney W. Grimes 	} else
2288d0f9a93SPedro F. Giffuni 		len = strlen(pattern);
22958f0484fSRodney W. Grimes 
23058f0484fSRodney W. Grimes 	/* do the mallocs early so failure handling is easy */
23180f36366STim J. Robbins 	g = (struct re_guts *)malloc(sizeof(struct re_guts));
23258f0484fSRodney W. Grimes 	if (g == NULL)
23358f0484fSRodney W. Grimes 		return(REG_ESPACE);
234a89629abSXin LI 	/*
235a89629abSXin LI 	 * Limit the pattern space to avoid a 32-bit overflow on buffer
236a89629abSXin LI 	 * extension.  Also avoid any signed overflow in case of conversion
237a89629abSXin LI 	 * so make the real limit based on a 31-bit overflow.
238a89629abSXin LI 	 *
239a89629abSXin LI 	 * Likely not applicable on 64-bit systems but handle the case
240a89629abSXin LI 	 * generically (who are we to stop people from using ~715MB+
241a89629abSXin LI 	 * patterns?).
242a89629abSXin LI 	 */
243a89629abSXin LI 	maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
244a89629abSXin LI 	if (len >= maxlen) {
245a89629abSXin LI 		free((char *)g);
246a89629abSXin LI 		return(REG_ESPACE);
247a89629abSXin LI 	}
24858f0484fSRodney W. Grimes 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
249a89629abSXin LI 	assert(p->ssize >= len);
250a89629abSXin LI 
251e234ddefSPedro F. Giffuni 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
25258f0484fSRodney W. Grimes 	p->slen = 0;
25358f0484fSRodney W. Grimes 	if (p->strip == NULL) {
25458f0484fSRodney W. Grimes 		free((char *)g);
25558f0484fSRodney W. Grimes 		return(REG_ESPACE);
25658f0484fSRodney W. Grimes 	}
25758f0484fSRodney W. Grimes 
25858f0484fSRodney W. Grimes 	/* set things up */
25958f0484fSRodney W. Grimes 	p->g = g;
2608d0f9a93SPedro F. Giffuni 	p->next = pattern;	/* convenience; we do not modify it */
26158f0484fSRodney W. Grimes 	p->end = p->next + len;
26258f0484fSRodney W. Grimes 	p->error = 0;
26358f0484fSRodney W. Grimes 	p->ncsalloc = 0;
264adeebf4cSKyle Evans 	p->pflags = pflags;
26558f0484fSRodney W. Grimes 	for (i = 0; i < NPAREN; i++) {
26658f0484fSRodney W. Grimes 		p->pbegin[i] = 0;
26758f0484fSRodney W. Grimes 		p->pend[i] = 0;
26858f0484fSRodney W. Grimes 	}
26918a1e2e9SKyle Evans #ifdef LIBREGEX
27018a1e2e9SKyle Evans 	if (cflags&REG_POSIX) {
27118a1e2e9SKyle Evans 		p->gnuext = false;
27218a1e2e9SKyle Evans 		p->allowbranch = (cflags & REG_EXTENDED) != 0;
27318a1e2e9SKyle Evans 	} else
27418a1e2e9SKyle Evans 		p->gnuext = p->allowbranch = true;
27518a1e2e9SKyle Evans #else
27618a1e2e9SKyle Evans 	p->gnuext = false;
27718a1e2e9SKyle Evans 	p->allowbranch = (cflags & REG_EXTENDED) != 0;
27818a1e2e9SKyle Evans #endif
27915ae9efaSKyle Evans 	if (cflags & REG_EXTENDED) {
28015ae9efaSKyle Evans 		p->bre = false;
28115ae9efaSKyle Evans 		p->parse_expr = p_ere_exp;
28215ae9efaSKyle Evans 		p->pre_parse = NULL;
28315ae9efaSKyle Evans 		p->post_parse = NULL;
28415ae9efaSKyle Evans 	} else {
28515ae9efaSKyle Evans 		p->bre = true;
28615ae9efaSKyle Evans 		p->parse_expr = p_simp_re;
28715ae9efaSKyle Evans 		p->pre_parse = p_bre_pre_parse;
28815ae9efaSKyle Evans 		p->post_parse = p_bre_post_parse;
28915ae9efaSKyle Evans 	}
29058f0484fSRodney W. Grimes 	g->sets = NULL;
29158f0484fSRodney W. Grimes 	g->ncsets = 0;
29258f0484fSRodney W. Grimes 	g->cflags = cflags;
29358f0484fSRodney W. Grimes 	g->iflags = 0;
29458f0484fSRodney W. Grimes 	g->nbol = 0;
29558f0484fSRodney W. Grimes 	g->neol = 0;
29658f0484fSRodney W. Grimes 	g->must = NULL;
297e6a886d8SDaniel C. Sobral 	g->moffset = -1;
2986b709b74SDaniel C. Sobral 	g->charjump = NULL;
2996b709b74SDaniel C. Sobral 	g->matchjump = NULL;
30058f0484fSRodney W. Grimes 	g->mlen = 0;
30158f0484fSRodney W. Grimes 	g->nsub = 0;
30258f0484fSRodney W. Grimes 	g->backrefs = 0;
30358f0484fSRodney W. Grimes 
30458f0484fSRodney W. Grimes 	/* do it */
30558f0484fSRodney W. Grimes 	EMIT(OEND, 0);
30658f0484fSRodney W. Grimes 	g->firststate = THERE();
30715ae9efaSKyle Evans 	if (cflags & REG_NOSPEC)
30858f0484fSRodney W. Grimes 		p_str(p);
30958f0484fSRodney W. Grimes 	else
31015ae9efaSKyle Evans 		p_re(p, OUT, OUT);
31158f0484fSRodney W. Grimes 	EMIT(OEND, 0);
31258f0484fSRodney W. Grimes 	g->laststate = THERE();
31358f0484fSRodney W. Grimes 
31458f0484fSRodney W. Grimes 	/* tidy up loose ends and fill things in */
31558f0484fSRodney W. Grimes 	stripsnug(p, g);
31658f0484fSRodney W. Grimes 	findmust(p, g);
3176049d9f0SDaniel C. Sobral 	/* only use Boyer-Moore algorithm if the pattern is bigger
3186049d9f0SDaniel C. Sobral 	 * than three characters
3196049d9f0SDaniel C. Sobral 	 */
3206049d9f0SDaniel C. Sobral 	if(g->mlen > 3) {
3216049d9f0SDaniel C. Sobral 		computejumps(p, g);
3226049d9f0SDaniel C. Sobral 		computematchjumps(p, g);
3234d41cae8SDaniel C. Sobral 		if(g->matchjump == NULL && g->charjump != NULL) {
324619f455bSCorinna Vinschen 			free(&g->charjump[CHAR_MIN]);
3256049d9f0SDaniel C. Sobral 			g->charjump = NULL;
3266049d9f0SDaniel C. Sobral 		}
3276049d9f0SDaniel C. Sobral 	}
32858f0484fSRodney W. Grimes 	g->nplus = pluscount(p, g);
32958f0484fSRodney W. Grimes 	g->magic = MAGIC2;
33058f0484fSRodney W. Grimes 	preg->re_nsub = g->nsub;
33158f0484fSRodney W. Grimes 	preg->re_g = g;
33258f0484fSRodney W. Grimes 	preg->re_magic = MAGIC1;
33358f0484fSRodney W. Grimes #ifndef REDEBUG
33458f0484fSRodney W. Grimes 	/* not debugging, so can't rely on the assert() in regexec() */
33558f0484fSRodney W. Grimes 	if (g->iflags&BAD)
33658f0484fSRodney W. Grimes 		SETERROR(REG_ASSERT);
33758f0484fSRodney W. Grimes #endif
33858f0484fSRodney W. Grimes 
33958f0484fSRodney W. Grimes 	/* win or lose, we're done */
34058f0484fSRodney W. Grimes 	if (p->error != 0)	/* lose */
34158f0484fSRodney W. Grimes 		regfree(preg);
34258f0484fSRodney W. Grimes 	return(p->error);
34358f0484fSRodney W. Grimes }
34458f0484fSRodney W. Grimes 
34558f0484fSRodney W. Grimes /*
346adeebf4cSKyle Evans  - regcomp - interface for parser and compilation
347adeebf4cSKyle Evans  = extern int regcomp(regex_t *, const char *, int);
348adeebf4cSKyle Evans  = #define	REG_BASIC	0000
349adeebf4cSKyle Evans  = #define	REG_EXTENDED	0001
350adeebf4cSKyle Evans  = #define	REG_ICASE	0002
351adeebf4cSKyle Evans  = #define	REG_NOSUB	0004
352adeebf4cSKyle Evans  = #define	REG_NEWLINE	0010
353adeebf4cSKyle Evans  = #define	REG_NOSPEC	0020
354adeebf4cSKyle Evans  = #define	REG_PEND	0040
355adeebf4cSKyle Evans  = #define	REG_DUMP	0200
356adeebf4cSKyle Evans  */
357adeebf4cSKyle Evans int				/* 0 success, otherwise REG_something */
regcomp(regex_t * __restrict preg,const char * __restrict pattern,int cflags)358adeebf4cSKyle Evans regcomp(regex_t * __restrict preg,
359adeebf4cSKyle Evans 	const char * __restrict pattern,
360adeebf4cSKyle Evans 	int cflags)
361adeebf4cSKyle Evans {
362adeebf4cSKyle Evans 
363adeebf4cSKyle Evans 	return (regcomp_internal(preg, pattern, cflags, 0));
364adeebf4cSKyle Evans }
365adeebf4cSKyle Evans 
366adeebf4cSKyle Evans #ifndef LIBREGEX
367adeebf4cSKyle Evans /*
368adeebf4cSKyle Evans  * Legacy interface that requires more lax escaping behavior.
369adeebf4cSKyle Evans  */
370adeebf4cSKyle Evans int
freebsd12_regcomp(regex_t * __restrict preg,const char * __restrict pattern,int cflags,int pflags)371adeebf4cSKyle Evans freebsd12_regcomp(regex_t * __restrict preg,
372adeebf4cSKyle Evans 	const char * __restrict pattern,
373adeebf4cSKyle Evans 	int cflags, int pflags)
374adeebf4cSKyle Evans {
375adeebf4cSKyle Evans 
376adeebf4cSKyle Evans 	return (regcomp_internal(preg, pattern, cflags, PFLAG_LEGACY_ESC));
377adeebf4cSKyle Evans }
378adeebf4cSKyle Evans 
379adeebf4cSKyle Evans __sym_compat(regcomp, freebsd12_regcomp, FBSD_1.0);
380adeebf4cSKyle Evans #endif	/* !LIBREGEX */
381adeebf4cSKyle Evans 
382adeebf4cSKyle Evans /*
38315ae9efaSKyle Evans  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op,
38415ae9efaSKyle Evans  - return whether we should terminate or not
38515ae9efaSKyle Evans  == static bool p_ere_exp(struct parse *p);
38658f0484fSRodney W. Grimes  */
38715ae9efaSKyle Evans static bool
p_ere_exp(struct parse * p,struct branchc * bc)38815ae9efaSKyle Evans p_ere_exp(struct parse *p, struct branchc *bc)
38958f0484fSRodney W. Grimes {
3908fb3f3f6SDavid E. O'Brien 	char c;
391e5996857STim J. Robbins 	wint_t wc;
3928fb3f3f6SDavid E. O'Brien 	sopno pos;
3938fb3f3f6SDavid E. O'Brien 	int count;
3948fb3f3f6SDavid E. O'Brien 	int count2;
39518a1e2e9SKyle Evans #ifdef LIBREGEX
39618a1e2e9SKyle Evans 	int i;
39718a1e2e9SKyle Evans 	int handled;
39818a1e2e9SKyle Evans #endif
3998fb3f3f6SDavid E. O'Brien 	sopno subno;
40058f0484fSRodney W. Grimes 	int wascaret = 0;
40158f0484fSRodney W. Grimes 
4024f8f1c79SKyle Evans 	(void)bc;
40358f0484fSRodney W. Grimes 	assert(MORE());		/* caller should have ensured this */
40458f0484fSRodney W. Grimes 	c = GETNEXT();
40558f0484fSRodney W. Grimes 
40618a1e2e9SKyle Evans #ifdef LIBREGEX
40718a1e2e9SKyle Evans 	handled = 0;
40818a1e2e9SKyle Evans #endif
40958f0484fSRodney W. Grimes 	pos = HERE();
41058f0484fSRodney W. Grimes 	switch (c) {
41158f0484fSRodney W. Grimes 	case '(':
41251295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EPAREN);
41358f0484fSRodney W. Grimes 		p->g->nsub++;
41458f0484fSRodney W. Grimes 		subno = p->g->nsub;
41558f0484fSRodney W. Grimes 		if (subno < NPAREN)
41658f0484fSRodney W. Grimes 			p->pbegin[subno] = HERE();
41758f0484fSRodney W. Grimes 		EMIT(OLPAREN, subno);
41858f0484fSRodney W. Grimes 		if (!SEE(')'))
41915ae9efaSKyle Evans 			p_re(p, ')', IGN);
42058f0484fSRodney W. Grimes 		if (subno < NPAREN) {
42158f0484fSRodney W. Grimes 			p->pend[subno] = HERE();
42258f0484fSRodney W. Grimes 			assert(p->pend[subno] != 0);
42358f0484fSRodney W. Grimes 		}
42458f0484fSRodney W. Grimes 		EMIT(ORPAREN, subno);
42551295a4dSJordan K. Hubbard 		(void)MUSTEAT(')', REG_EPAREN);
42658f0484fSRodney W. Grimes 		break;
42758f0484fSRodney W. Grimes #ifndef POSIX_MISTAKE
42858f0484fSRodney W. Grimes 	case ')':		/* happens only if no current unmatched ( */
42958f0484fSRodney W. Grimes 		/*
43058f0484fSRodney W. Grimes 		 * You may ask, why the ifndef?  Because I didn't notice
43158f0484fSRodney W. Grimes 		 * this until slightly too late for 1003.2, and none of the
43258f0484fSRodney W. Grimes 		 * other 1003.2 regular-expression reviewers noticed it at
43358f0484fSRodney W. Grimes 		 * all.  So an unmatched ) is legal POSIX, at least until
43458f0484fSRodney W. Grimes 		 * we can get it fixed.
43558f0484fSRodney W. Grimes 		 */
43658f0484fSRodney W. Grimes 		SETERROR(REG_EPAREN);
43758f0484fSRodney W. Grimes 		break;
43858f0484fSRodney W. Grimes #endif
43958f0484fSRodney W. Grimes 	case '^':
44058f0484fSRodney W. Grimes 		EMIT(OBOL, 0);
44158f0484fSRodney W. Grimes 		p->g->iflags |= USEBOL;
44258f0484fSRodney W. Grimes 		p->g->nbol++;
44358f0484fSRodney W. Grimes 		wascaret = 1;
44458f0484fSRodney W. Grimes 		break;
44558f0484fSRodney W. Grimes 	case '$':
44658f0484fSRodney W. Grimes 		EMIT(OEOL, 0);
44758f0484fSRodney W. Grimes 		p->g->iflags |= USEEOL;
44858f0484fSRodney W. Grimes 		p->g->neol++;
44958f0484fSRodney W. Grimes 		break;
45058f0484fSRodney W. Grimes 	case '|':
45158f0484fSRodney W. Grimes 		SETERROR(REG_EMPTY);
45258f0484fSRodney W. Grimes 		break;
45358f0484fSRodney W. Grimes 	case '*':
45458f0484fSRodney W. Grimes 	case '+':
45558f0484fSRodney W. Grimes 	case '?':
456*f77b5b29SSimon J. Gerraty #ifndef NO_STRICT_REGEX
457a4a80168SKyle Evans 	case '{':
458*f77b5b29SSimon J. Gerraty #endif
45958f0484fSRodney W. Grimes 		SETERROR(REG_BADRPT);
46058f0484fSRodney W. Grimes 		break;
46158f0484fSRodney W. Grimes 	case '.':
46258f0484fSRodney W. Grimes 		if (p->g->cflags&REG_NEWLINE)
46358f0484fSRodney W. Grimes 			nonnewline(p);
46458f0484fSRodney W. Grimes 		else
46558f0484fSRodney W. Grimes 			EMIT(OANY, 0);
46658f0484fSRodney W. Grimes 		break;
46758f0484fSRodney W. Grimes 	case '[':
46858f0484fSRodney W. Grimes 		p_bracket(p);
46958f0484fSRodney W. Grimes 		break;
47058f0484fSRodney W. Grimes 	case '\\':
47151295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EESCAPE);
472e5996857STim J. Robbins 		wc = WGETNEXT();
47318a1e2e9SKyle Evans #ifdef LIBREGEX
47418a1e2e9SKyle Evans 		if (p->gnuext) {
47518a1e2e9SKyle Evans 			handled = 1;
47618a1e2e9SKyle Evans 			switch (wc) {
477ca53e5aeSKyle Evans 			case '`':
478ca53e5aeSKyle Evans 				EMIT(OBOS, 0);
479ca53e5aeSKyle Evans 				break;
480ca53e5aeSKyle Evans 			case '\'':
481ca53e5aeSKyle Evans 				EMIT(OEOS, 0);
482ca53e5aeSKyle Evans 				break;
4836b986646SKyle Evans 			case 'B':
4846b986646SKyle Evans 				EMIT(ONWBND, 0);
4856b986646SKyle Evans 				break;
4866b986646SKyle Evans 			case 'b':
4876b986646SKyle Evans 				EMIT(OWBND, 0);
4886b986646SKyle Evans 				break;
48918a1e2e9SKyle Evans 			case 'W':
49018a1e2e9SKyle Evans 			case 'w':
49118a1e2e9SKyle Evans 			case 'S':
49218a1e2e9SKyle Evans 			case 's':
49318a1e2e9SKyle Evans 				p_b_pseudoclass(p, wc);
49418a1e2e9SKyle Evans 				break;
49518a1e2e9SKyle Evans 			case '1':
49618a1e2e9SKyle Evans 			case '2':
49718a1e2e9SKyle Evans 			case '3':
49818a1e2e9SKyle Evans 			case '4':
49918a1e2e9SKyle Evans 			case '5':
50018a1e2e9SKyle Evans 			case '6':
50118a1e2e9SKyle Evans 			case '7':
50218a1e2e9SKyle Evans 			case '8':
50318a1e2e9SKyle Evans 			case '9':
50418a1e2e9SKyle Evans 				i = wc - '0';
50518a1e2e9SKyle Evans 				assert(i < NPAREN);
50618a1e2e9SKyle Evans 				if (p->pend[i] != 0) {
50718a1e2e9SKyle Evans 					assert(i <= p->g->nsub);
50818a1e2e9SKyle Evans 					EMIT(OBACK_, i);
50918a1e2e9SKyle Evans 					assert(p->pbegin[i] != 0);
51018a1e2e9SKyle Evans 					assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
51118a1e2e9SKyle Evans 					assert(OP(p->strip[p->pend[i]]) == ORPAREN);
51218a1e2e9SKyle Evans 					(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
51318a1e2e9SKyle Evans 					EMIT(O_BACK, i);
51418a1e2e9SKyle Evans 				} else
51518a1e2e9SKyle Evans 					SETERROR(REG_ESUBREG);
51618a1e2e9SKyle Evans 				p->g->backrefs = 1;
51718a1e2e9SKyle Evans 				break;
51818a1e2e9SKyle Evans 			default:
51918a1e2e9SKyle Evans 				handled = 0;
52018a1e2e9SKyle Evans 			}
52118a1e2e9SKyle Evans 			/* Don't proceed to the POSIX bits if we've already handled it */
52218a1e2e9SKyle Evans 			if (handled)
52318a1e2e9SKyle Evans 				break;
52418a1e2e9SKyle Evans 		}
52518a1e2e9SKyle Evans #endif
5266e283683SPedro F. Giffuni 		switch (wc) {
5276e283683SPedro F. Giffuni 		case '<':
5286e283683SPedro F. Giffuni 			EMIT(OBOW, 0);
5296e283683SPedro F. Giffuni 			break;
5306e283683SPedro F. Giffuni 		case '>':
5316e283683SPedro F. Giffuni 			EMIT(OEOW, 0);
5326e283683SPedro F. Giffuni 			break;
5336e283683SPedro F. Giffuni 		default:
534adeebf4cSKyle Evans 			if (may_escape(p, wc))
535e5996857STim J. Robbins 				ordinary(p, wc);
536adeebf4cSKyle Evans 			else
537adeebf4cSKyle Evans 				SETERROR(REG_EESCAPE);
53858f0484fSRodney W. Grimes 			break;
5396e283683SPedro F. Giffuni 		}
5406e283683SPedro F. Giffuni 		break;
541*f77b5b29SSimon J. Gerraty #ifdef NO_STRICT_REGEX
542*f77b5b29SSimon J. Gerraty 	case '{':               /* okay as ordinary except if digit follows */
543*f77b5b29SSimon J. Gerraty 	    (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
544*f77b5b29SSimon J. Gerraty 	    /* FALLTHROUGH */
545*f77b5b29SSimon J. Gerraty #endif
54658f0484fSRodney W. Grimes 	default:
5479806ef78SBrooks Davis 		if (p->error != 0)
54815ae9efaSKyle Evans 			return (false);
549e5996857STim J. Robbins 		p->next--;
550e5996857STim J. Robbins 		wc = WGETNEXT();
551e5996857STim J. Robbins 		ordinary(p, wc);
55258f0484fSRodney W. Grimes 		break;
55358f0484fSRodney W. Grimes 	}
55458f0484fSRodney W. Grimes 
55558f0484fSRodney W. Grimes 	if (!MORE())
55615ae9efaSKyle Evans 		return (false);
55758f0484fSRodney W. Grimes 	c = PEEK();
55858f0484fSRodney W. Grimes 	/* we call { a repetition if followed by a digit */
559*f77b5b29SSimon J. Gerraty 	if (!( c == '*' || c == '+' || c == '?' ||
560*f77b5b29SSimon J. Gerraty #ifdef NO_STRICT_REGEX
561*f77b5b29SSimon J. Gerraty 	       (c == '{' && MORE2() && isdigit((uch)PEEK2()))
562*f77b5b29SSimon J. Gerraty #else
563*f77b5b29SSimon J. Gerraty 	       c == '{'
564*f77b5b29SSimon J. Gerraty #endif
565*f77b5b29SSimon J. Gerraty 	       ))
56615ae9efaSKyle Evans 		return (false);		/* no repetition, we're done */
567*f77b5b29SSimon J. Gerraty #ifndef NO_STRICT_REGEX
568a4a80168SKyle Evans 	else if (c == '{')
569a4a80168SKyle Evans 		(void)REQUIRE(MORE2() && \
570a4a80168SKyle Evans 		    (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
571*f77b5b29SSimon J. Gerraty #endif
57258f0484fSRodney W. Grimes 	NEXT();
57358f0484fSRodney W. Grimes 
57451295a4dSJordan K. Hubbard 	(void)REQUIRE(!wascaret, REG_BADRPT);
57558f0484fSRodney W. Grimes 	switch (c) {
57658f0484fSRodney W. Grimes 	case '*':	/* implemented as +? */
57758f0484fSRodney W. Grimes 		/* this case does not require the (y|) trick, noKLUDGE */
57858f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
57958f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
58058f0484fSRodney W. Grimes 		INSERT(OQUEST_, pos);
58158f0484fSRodney W. Grimes 		ASTERN(O_QUEST, pos);
58258f0484fSRodney W. Grimes 		break;
58358f0484fSRodney W. Grimes 	case '+':
58458f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
58558f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
58658f0484fSRodney W. Grimes 		break;
58758f0484fSRodney W. Grimes 	case '?':
58858f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
58958f0484fSRodney W. Grimes 		INSERT(OCH_, pos);		/* offset slightly wrong */
59058f0484fSRodney W. Grimes 		ASTERN(OOR1, pos);		/* this one's right */
59158f0484fSRodney W. Grimes 		AHEAD(pos);			/* fix the OCH_ */
59258f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset very wrong... */
59358f0484fSRodney W. Grimes 		AHEAD(THERE());			/* ...so fix it */
59458f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
59558f0484fSRodney W. Grimes 		break;
59658f0484fSRodney W. Grimes 	case '{':
59758f0484fSRodney W. Grimes 		count = p_count(p);
59858f0484fSRodney W. Grimes 		if (EAT(',')) {
59989b86d02SAndrey A. Chernov 			if (isdigit((uch)PEEK())) {
60058f0484fSRodney W. Grimes 				count2 = p_count(p);
60151295a4dSJordan K. Hubbard 				(void)REQUIRE(count <= count2, REG_BADBR);
60258f0484fSRodney W. Grimes 			} else		/* single number with comma */
60358f0484fSRodney W. Grimes 				count2 = INFINITY;
60458f0484fSRodney W. Grimes 		} else		/* just a single number */
60558f0484fSRodney W. Grimes 			count2 = count;
60658f0484fSRodney W. Grimes 		repeat(p, pos, count, count2);
60758f0484fSRodney W. Grimes 		if (!EAT('}')) {	/* error heuristics */
60858f0484fSRodney W. Grimes 			while (MORE() && PEEK() != '}')
60958f0484fSRodney W. Grimes 				NEXT();
61051295a4dSJordan K. Hubbard 			(void)REQUIRE(MORE(), REG_EBRACE);
61158f0484fSRodney W. Grimes 			SETERROR(REG_BADBR);
61258f0484fSRodney W. Grimes 		}
61358f0484fSRodney W. Grimes 		break;
61458f0484fSRodney W. Grimes 	}
61558f0484fSRodney W. Grimes 
61658f0484fSRodney W. Grimes 	if (!MORE())
61715ae9efaSKyle Evans 		return (false);
61858f0484fSRodney W. Grimes 	c = PEEK();
61958f0484fSRodney W. Grimes 	if (!( c == '*' || c == '+' || c == '?' ||
62089b86d02SAndrey A. Chernov 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
62115ae9efaSKyle Evans 		return (false);
62258f0484fSRodney W. Grimes 	SETERROR(REG_BADRPT);
62315ae9efaSKyle Evans 	return (false);
62458f0484fSRodney W. Grimes }
62558f0484fSRodney W. Grimes 
62658f0484fSRodney W. Grimes /*
62758f0484fSRodney W. Grimes  - p_str - string (no metacharacters) "parser"
6288fb3f3f6SDavid E. O'Brien  == static void p_str(struct parse *p);
62958f0484fSRodney W. Grimes  */
63058f0484fSRodney W. Grimes static void
p_str(struct parse * p)63154a648d1SXin LI p_str(struct parse *p)
63258f0484fSRodney W. Grimes {
63351295a4dSJordan K. Hubbard 	(void)REQUIRE(MORE(), REG_EMPTY);
63458f0484fSRodney W. Grimes 	while (MORE())
635e5996857STim J. Robbins 		ordinary(p, WGETNEXT());
63658f0484fSRodney W. Grimes }
63758f0484fSRodney W. Grimes 
63858f0484fSRodney W. Grimes /*
63915ae9efaSKyle Evans  * Eat consecutive branch delimiters for the kind of expression that we are
64015ae9efaSKyle Evans  * parsing, return the number of delimiters that we ate.
64115ae9efaSKyle Evans  */
64215ae9efaSKyle Evans static int
p_branch_eat_delim(struct parse * p,struct branchc * bc)64315ae9efaSKyle Evans p_branch_eat_delim(struct parse *p, struct branchc *bc)
64415ae9efaSKyle Evans {
64515ae9efaSKyle Evans 	int nskip;
64615ae9efaSKyle Evans 
6474f8f1c79SKyle Evans 	(void)bc;
64815ae9efaSKyle Evans 	nskip = 0;
64918a1e2e9SKyle Evans 	while (EATSPEC('|'))
65015ae9efaSKyle Evans 		++nskip;
65115ae9efaSKyle Evans 	return (nskip);
65215ae9efaSKyle Evans }
65315ae9efaSKyle Evans 
65415ae9efaSKyle Evans /*
65515ae9efaSKyle Evans  * Insert necessary branch book-keeping operations. This emits a
65615ae9efaSKyle Evans  * bogus 'next' offset, since we still have more to parse
65715ae9efaSKyle Evans  */
65815ae9efaSKyle Evans static void
p_branch_ins_offset(struct parse * p,struct branchc * bc)65915ae9efaSKyle Evans p_branch_ins_offset(struct parse *p, struct branchc *bc)
66015ae9efaSKyle Evans {
66115ae9efaSKyle Evans 
66215ae9efaSKyle Evans 	if (bc->nbranch == 0) {
66315ae9efaSKyle Evans 		INSERT(OCH_, bc->start);	/* offset is wrong */
66415ae9efaSKyle Evans 		bc->fwd = bc->start;
66515ae9efaSKyle Evans 		bc->back = bc->start;
66615ae9efaSKyle Evans 	}
66715ae9efaSKyle Evans 
66815ae9efaSKyle Evans 	ASTERN(OOR1, bc->back);
66915ae9efaSKyle Evans 	bc->back = THERE();
67015ae9efaSKyle Evans 	AHEAD(bc->fwd);			/* fix previous offset */
67115ae9efaSKyle Evans 	bc->fwd = HERE();
67215ae9efaSKyle Evans 	EMIT(OOR2, 0);			/* offset is very wrong */
67315ae9efaSKyle Evans 	++bc->nbranch;
67415ae9efaSKyle Evans }
67515ae9efaSKyle Evans 
67615ae9efaSKyle Evans /*
67715ae9efaSKyle Evans  * Fix the offset of the tail branch, if we actually had any branches.
67815ae9efaSKyle Evans  * This is to correct the bogus placeholder offset that we use.
67915ae9efaSKyle Evans  */
68015ae9efaSKyle Evans static void
p_branch_fix_tail(struct parse * p,struct branchc * bc)68115ae9efaSKyle Evans p_branch_fix_tail(struct parse *p, struct branchc *bc)
68215ae9efaSKyle Evans {
68315ae9efaSKyle Evans 
68415ae9efaSKyle Evans 	/* Fix bogus offset at the tail if we actually have branches */
68515ae9efaSKyle Evans 	if (bc->nbranch > 0) {
68615ae9efaSKyle Evans 		AHEAD(bc->fwd);
68715ae9efaSKyle Evans 		ASTERN(O_CH, bc->back);
68815ae9efaSKyle Evans 	}
68915ae9efaSKyle Evans }
69015ae9efaSKyle Evans 
69115ae9efaSKyle Evans /*
69215ae9efaSKyle Evans  * Signal to the parser that an empty branch has been encountered; this will,
69315ae9efaSKyle Evans  * in the future, be used to allow for more permissive behavior with empty
6943ea376f6SKyle Evans  * branches. The return value should indicate whether parsing may continue
6953ea376f6SKyle Evans  * or not.
69615ae9efaSKyle Evans  */
6973ea376f6SKyle Evans static bool
p_branch_empty(struct parse * p,struct branchc * bc)69815ae9efaSKyle Evans p_branch_empty(struct parse *p, struct branchc *bc)
69915ae9efaSKyle Evans {
70015ae9efaSKyle Evans 
7014f8f1c79SKyle Evans 	(void)bc;
70215ae9efaSKyle Evans 	SETERROR(REG_EMPTY);
7033ea376f6SKyle Evans 	return (false);
70415ae9efaSKyle Evans }
70515ae9efaSKyle Evans 
70615ae9efaSKyle Evans /*
70715ae9efaSKyle Evans  * Take care of any branching requirements. This includes inserting the
70815ae9efaSKyle Evans  * appropriate branching instructions as well as eating all of the branch
70915ae9efaSKyle Evans  * delimiters until we either run out of pattern or need to parse more pattern.
71015ae9efaSKyle Evans  */
71115ae9efaSKyle Evans static bool
p_branch_do(struct parse * p,struct branchc * bc)71215ae9efaSKyle Evans p_branch_do(struct parse *p, struct branchc *bc)
71315ae9efaSKyle Evans {
71415ae9efaSKyle Evans 	int ate = 0;
71515ae9efaSKyle Evans 
71615ae9efaSKyle Evans 	ate = p_branch_eat_delim(p, bc);
71715ae9efaSKyle Evans 	if (ate == 0)
71815ae9efaSKyle Evans 		return (false);
7193ea376f6SKyle Evans 	else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
7203ea376f6SKyle Evans 		/*
7213ea376f6SKyle Evans 		 * Halt parsing only if we have an empty branch and p_branch_empty
7223ea376f6SKyle Evans 		 * indicates that we must not continue. In the future, this will not
7233ea376f6SKyle Evans 		 * necessarily be an error.
7243ea376f6SKyle Evans 		 */
7253ea376f6SKyle Evans 		return (false);
72615ae9efaSKyle Evans 	p_branch_ins_offset(p, bc);
72715ae9efaSKyle Evans 
72815ae9efaSKyle Evans 	return (true);
72915ae9efaSKyle Evans }
73015ae9efaSKyle Evans 
73115ae9efaSKyle Evans static void
p_bre_pre_parse(struct parse * p,struct branchc * bc)73215ae9efaSKyle Evans p_bre_pre_parse(struct parse *p, struct branchc *bc)
73315ae9efaSKyle Evans {
73415ae9efaSKyle Evans 
73515ae9efaSKyle Evans 	(void) bc;
73615ae9efaSKyle Evans 	/*
73715ae9efaSKyle Evans 	 * Does not move cleanly into expression parser because of
73815ae9efaSKyle Evans 	 * ordinary interpration of * at the beginning position of
73915ae9efaSKyle Evans 	 * an expression.
74015ae9efaSKyle Evans 	 */
74115ae9efaSKyle Evans 	if (EAT('^')) {
74215ae9efaSKyle Evans 		EMIT(OBOL, 0);
74315ae9efaSKyle Evans 		p->g->iflags |= USEBOL;
74415ae9efaSKyle Evans 		p->g->nbol++;
74515ae9efaSKyle Evans 	}
74615ae9efaSKyle Evans }
74715ae9efaSKyle Evans 
74815ae9efaSKyle Evans static void
p_bre_post_parse(struct parse * p,struct branchc * bc)74915ae9efaSKyle Evans p_bre_post_parse(struct parse *p, struct branchc *bc)
75015ae9efaSKyle Evans {
75115ae9efaSKyle Evans 
75215ae9efaSKyle Evans 	/* Expression is terminating due to EOL token */
75315ae9efaSKyle Evans 	if (bc->terminate) {
75415ae9efaSKyle Evans 		DROP(1);
75515ae9efaSKyle Evans 		EMIT(OEOL, 0);
75615ae9efaSKyle Evans 		p->g->iflags |= USEEOL;
75715ae9efaSKyle Evans 		p->g->neol++;
75815ae9efaSKyle Evans 	}
75915ae9efaSKyle Evans }
76015ae9efaSKyle Evans 
76115ae9efaSKyle Evans /*
76215ae9efaSKyle Evans  - p_re - Top level parser, concatenation and BRE anchoring
76315ae9efaSKyle Evans  == static void p_re(struct parse *p, int end1, int end2);
76458f0484fSRodney W. Grimes  * Giving end1 as OUT essentially eliminates the end1/end2 check.
76558f0484fSRodney W. Grimes  *
76658f0484fSRodney W. Grimes  * This implementation is a bit of a kludge, in that a trailing $ is first
76780f36366STim J. Robbins  * taken as an ordinary character and then revised to be an anchor.
76858f0484fSRodney W. Grimes  * The amount of lookahead needed to avoid this kludge is excessive.
76958f0484fSRodney W. Grimes  */
77058f0484fSRodney W. Grimes static void
p_re(struct parse * p,int end1,int end2)77115ae9efaSKyle Evans p_re(struct parse *p,
7725249ac86SKevin Lo 	int end1,	/* first terminating character */
77315ae9efaSKyle Evans 	int end2)	/* second terminating character; ignored for EREs */
77458f0484fSRodney W. Grimes {
77515ae9efaSKyle Evans 	struct branchc bc;
77658f0484fSRodney W. Grimes 
77715ae9efaSKyle Evans 	bc.nbranch = 0;
77815ae9efaSKyle Evans 	if (end1 == OUT && end2 == OUT)
77915ae9efaSKyle Evans 		bc.outer = true;
78015ae9efaSKyle Evans 	else
78115ae9efaSKyle Evans 		bc.outer = false;
78215ae9efaSKyle Evans #define	SEEEND()	(!p->bre ? SEE(end1) : SEETWO(end1, end2))
78315ae9efaSKyle Evans 	for (;;) {
78415ae9efaSKyle Evans 		bc.start = HERE();
78515ae9efaSKyle Evans 		bc.nchain = 0;
78615ae9efaSKyle Evans 		bc.terminate = false;
78715ae9efaSKyle Evans 		if (p->pre_parse != NULL)
78815ae9efaSKyle Evans 			p->pre_parse(p, &bc);
78979c9a695SKyle Evans 		while (MORE() && (!p->allowbranch || !SEESPEC('|')) && !SEEEND()) {
79015ae9efaSKyle Evans 			bc.terminate = p->parse_expr(p, &bc);
79115ae9efaSKyle Evans 			++bc.nchain;
79258f0484fSRodney W. Grimes 		}
79315ae9efaSKyle Evans 		if (p->post_parse != NULL)
79415ae9efaSKyle Evans 			p->post_parse(p, &bc);
79518a1e2e9SKyle Evans 		(void) REQUIRE(p->gnuext || HERE() != bc.start, REG_EMPTY);
79618a1e2e9SKyle Evans #ifdef LIBREGEX
79718a1e2e9SKyle Evans 		if (HERE() == bc.start && !p_branch_empty(p, &bc))
79818a1e2e9SKyle Evans 			break;
79918a1e2e9SKyle Evans #endif
80015ae9efaSKyle Evans 		if (!p->allowbranch)
80115ae9efaSKyle Evans 			break;
80215ae9efaSKyle Evans 		/*
80315ae9efaSKyle Evans 		 * p_branch_do's return value indicates whether we should
80415ae9efaSKyle Evans 		 * continue parsing or not. This is both for correctness and
80515ae9efaSKyle Evans 		 * a slight optimization, because it will check if we've
80615ae9efaSKyle Evans 		 * encountered an empty branch or the end of the string
80715ae9efaSKyle Evans 		 * immediately following a branch delimiter.
80815ae9efaSKyle Evans 		 */
80915ae9efaSKyle Evans 		if (!p_branch_do(p, &bc))
81015ae9efaSKyle Evans 			break;
81158f0484fSRodney W. Grimes 	}
81215ae9efaSKyle Evans #undef SEE_END
81315ae9efaSKyle Evans 	if (p->allowbranch)
81415ae9efaSKyle Evans 		p_branch_fix_tail(p, &bc);
81515ae9efaSKyle Evans 	assert(!MORE() || SEE(end1));
81658f0484fSRodney W. Grimes }
81758f0484fSRodney W. Grimes 
81858f0484fSRodney W. Grimes /*
81958f0484fSRodney W. Grimes  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
82015ae9efaSKyle Evans  == static bool p_simp_re(struct parse *p, struct branchc *bc);
82158f0484fSRodney W. Grimes  */
82215ae9efaSKyle Evans static bool			/* was the simple RE an unbackslashed $? */
p_simp_re(struct parse * p,struct branchc * bc)82315ae9efaSKyle Evans p_simp_re(struct parse *p, struct branchc *bc)
82458f0484fSRodney W. Grimes {
8258fb3f3f6SDavid E. O'Brien 	int c;
82618a1e2e9SKyle Evans 	int cc;			/* convenient/control character */
8278fb3f3f6SDavid E. O'Brien 	int count;
8288fb3f3f6SDavid E. O'Brien 	int count2;
8298fb3f3f6SDavid E. O'Brien 	sopno pos;
83018a1e2e9SKyle Evans 	bool handled;
8318fb3f3f6SDavid E. O'Brien 	int i;
832e5996857STim J. Robbins 	wint_t wc;
8338fb3f3f6SDavid E. O'Brien 	sopno subno;
83458f0484fSRodney W. Grimes #	define	BACKSL	(1<<CHAR_BIT)
83558f0484fSRodney W. Grimes 
83632223c1bSPedro F. Giffuni 	pos = HERE();		/* repetition op, if any, covers from here */
83718a1e2e9SKyle Evans 	handled = false;
83858f0484fSRodney W. Grimes 
83958f0484fSRodney W. Grimes 	assert(MORE());		/* caller should have ensured this */
8403fb80f14SChristos Zoulas 	c = (uch)GETNEXT();
84158f0484fSRodney W. Grimes 	if (c == '\\') {
84251295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EESCAPE);
8433fb80f14SChristos Zoulas 		cc = (uch)GETNEXT();
84418a1e2e9SKyle Evans 		c = BACKSL | cc;
84518a1e2e9SKyle Evans #ifdef LIBREGEX
84618a1e2e9SKyle Evans 		if (p->gnuext) {
84718a1e2e9SKyle Evans 			handled = true;
84818a1e2e9SKyle Evans 			switch (c) {
849ca53e5aeSKyle Evans 			case BACKSL|'`':
850ca53e5aeSKyle Evans 				EMIT(OBOS, 0);
851ca53e5aeSKyle Evans 				break;
852ca53e5aeSKyle Evans 			case BACKSL|'\'':
853ca53e5aeSKyle Evans 				EMIT(OEOS, 0);
854ca53e5aeSKyle Evans 				break;
8556b986646SKyle Evans 			case BACKSL|'B':
8566b986646SKyle Evans 				EMIT(ONWBND, 0);
8576b986646SKyle Evans 				break;
8586b986646SKyle Evans 			case BACKSL|'b':
8596b986646SKyle Evans 				EMIT(OWBND, 0);
8606b986646SKyle Evans 				break;
86118a1e2e9SKyle Evans 			case BACKSL|'W':
86218a1e2e9SKyle Evans 			case BACKSL|'w':
86318a1e2e9SKyle Evans 			case BACKSL|'S':
86418a1e2e9SKyle Evans 			case BACKSL|'s':
86518a1e2e9SKyle Evans 				p_b_pseudoclass(p, cc);
86618a1e2e9SKyle Evans 				break;
86718a1e2e9SKyle Evans 			default:
86818a1e2e9SKyle Evans 				handled = false;
86958f0484fSRodney W. Grimes 			}
87018a1e2e9SKyle Evans 		}
87118a1e2e9SKyle Evans #endif
87218a1e2e9SKyle Evans 	}
87318a1e2e9SKyle Evans 	if (!handled) {
87458f0484fSRodney W. Grimes 		switch (c) {
87558f0484fSRodney W. Grimes 		case '.':
87658f0484fSRodney W. Grimes 			if (p->g->cflags&REG_NEWLINE)
87758f0484fSRodney W. Grimes 				nonnewline(p);
87858f0484fSRodney W. Grimes 			else
87958f0484fSRodney W. Grimes 				EMIT(OANY, 0);
88058f0484fSRodney W. Grimes 			break;
88158f0484fSRodney W. Grimes 		case '[':
88258f0484fSRodney W. Grimes 			p_bracket(p);
88358f0484fSRodney W. Grimes 			break;
8846e283683SPedro F. Giffuni 		case BACKSL|'<':
8856e283683SPedro F. Giffuni 			EMIT(OBOW, 0);
8866e283683SPedro F. Giffuni 			break;
8876e283683SPedro F. Giffuni 		case BACKSL|'>':
8886e283683SPedro F. Giffuni 			EMIT(OEOW, 0);
8896e283683SPedro F. Giffuni 			break;
89058f0484fSRodney W. Grimes 		case BACKSL|'{':
89158f0484fSRodney W. Grimes 			SETERROR(REG_BADRPT);
89258f0484fSRodney W. Grimes 			break;
89358f0484fSRodney W. Grimes 		case BACKSL|'(':
89458f0484fSRodney W. Grimes 			p->g->nsub++;
89558f0484fSRodney W. Grimes 			subno = p->g->nsub;
89658f0484fSRodney W. Grimes 			if (subno < NPAREN)
89758f0484fSRodney W. Grimes 				p->pbegin[subno] = HERE();
89858f0484fSRodney W. Grimes 			EMIT(OLPAREN, subno);
89958f0484fSRodney W. Grimes 			/* the MORE here is an error heuristic */
90058f0484fSRodney W. Grimes 			if (MORE() && !SEETWO('\\', ')'))
90115ae9efaSKyle Evans 				p_re(p, '\\', ')');
90258f0484fSRodney W. Grimes 			if (subno < NPAREN) {
90358f0484fSRodney W. Grimes 				p->pend[subno] = HERE();
90458f0484fSRodney W. Grimes 				assert(p->pend[subno] != 0);
90558f0484fSRodney W. Grimes 			}
90658f0484fSRodney W. Grimes 			EMIT(ORPAREN, subno);
90751295a4dSJordan K. Hubbard 			(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
90858f0484fSRodney W. Grimes 			break;
90958f0484fSRodney W. Grimes 		case BACKSL|')':	/* should not get here -- must be user */
910*f77b5b29SSimon J. Gerraty #ifdef NO_STRICT_REGEX
911*f77b5b29SSimon J. Gerraty 		case BACKSL|'}':
912*f77b5b29SSimon J. Gerraty #endif
91358f0484fSRodney W. Grimes 			SETERROR(REG_EPAREN);
91458f0484fSRodney W. Grimes 			break;
91558f0484fSRodney W. Grimes 		case BACKSL|'1':
91658f0484fSRodney W. Grimes 		case BACKSL|'2':
91758f0484fSRodney W. Grimes 		case BACKSL|'3':
91858f0484fSRodney W. Grimes 		case BACKSL|'4':
91958f0484fSRodney W. Grimes 		case BACKSL|'5':
92058f0484fSRodney W. Grimes 		case BACKSL|'6':
92158f0484fSRodney W. Grimes 		case BACKSL|'7':
92258f0484fSRodney W. Grimes 		case BACKSL|'8':
92358f0484fSRodney W. Grimes 		case BACKSL|'9':
92458f0484fSRodney W. Grimes 			i = (c&~BACKSL) - '0';
92558f0484fSRodney W. Grimes 			assert(i < NPAREN);
92658f0484fSRodney W. Grimes 			if (p->pend[i] != 0) {
92758f0484fSRodney W. Grimes 				assert(i <= p->g->nsub);
92858f0484fSRodney W. Grimes 				EMIT(OBACK_, i);
92958f0484fSRodney W. Grimes 				assert(p->pbegin[i] != 0);
93058f0484fSRodney W. Grimes 				assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
93158f0484fSRodney W. Grimes 				assert(OP(p->strip[p->pend[i]]) == ORPAREN);
93258f0484fSRodney W. Grimes 				(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
93358f0484fSRodney W. Grimes 				EMIT(O_BACK, i);
93458f0484fSRodney W. Grimes 			} else
93558f0484fSRodney W. Grimes 				SETERROR(REG_ESUBREG);
93658f0484fSRodney W. Grimes 			p->g->backrefs = 1;
93758f0484fSRodney W. Grimes 			break;
93858f0484fSRodney W. Grimes 		case '*':
93915ae9efaSKyle Evans 			/*
94015ae9efaSKyle Evans 			 * Ordinary if used as the first character beyond BOL anchor of
94115ae9efaSKyle Evans 			 * a (sub-)expression, counts as a bad repetition operator if it
94215ae9efaSKyle Evans 			 * appears otherwise.
94315ae9efaSKyle Evans 			 */
94415ae9efaSKyle Evans 			(void)REQUIRE(bc->nchain == 0, REG_BADRPT);
94558f0484fSRodney W. Grimes 			/* FALLTHROUGH */
94658f0484fSRodney W. Grimes 		default:
9479806ef78SBrooks Davis 			if (p->error != 0)
94815ae9efaSKyle Evans 				return (false);	/* Definitely not $... */
949e5996857STim J. Robbins 			p->next--;
950e5996857STim J. Robbins 			wc = WGETNEXT();
951adeebf4cSKyle Evans 			if ((c & BACKSL) == 0 || may_escape(p, wc))
952e5996857STim J. Robbins 				ordinary(p, wc);
953adeebf4cSKyle Evans 			else
954adeebf4cSKyle Evans 				SETERROR(REG_EESCAPE);
95558f0484fSRodney W. Grimes 			break;
95658f0484fSRodney W. Grimes 		}
95718a1e2e9SKyle Evans 	}
95858f0484fSRodney W. Grimes 
95958f0484fSRodney W. Grimes 	if (EAT('*')) {		/* implemented as +? */
96058f0484fSRodney W. Grimes 		/* this case does not require the (y|) trick, noKLUDGE */
96158f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
96258f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
96358f0484fSRodney W. Grimes 		INSERT(OQUEST_, pos);
96458f0484fSRodney W. Grimes 		ASTERN(O_QUEST, pos);
96518a1e2e9SKyle Evans #ifdef LIBREGEX
96618a1e2e9SKyle Evans 	} else if (p->gnuext && EATTWO('\\', '?')) {
96718a1e2e9SKyle Evans 		INSERT(OQUEST_, pos);
96818a1e2e9SKyle Evans 		ASTERN(O_QUEST, pos);
96918a1e2e9SKyle Evans 	} else if (p->gnuext && EATTWO('\\', '+')) {
97018a1e2e9SKyle Evans 		INSERT(OPLUS_, pos);
97118a1e2e9SKyle Evans 		ASTERN(O_PLUS, pos);
97218a1e2e9SKyle Evans #endif
97358f0484fSRodney W. Grimes 	} else if (EATTWO('\\', '{')) {
97458f0484fSRodney W. Grimes 		count = p_count(p);
97558f0484fSRodney W. Grimes 		if (EAT(',')) {
97689b86d02SAndrey A. Chernov 			if (MORE() && isdigit((uch)PEEK())) {
97758f0484fSRodney W. Grimes 				count2 = p_count(p);
97851295a4dSJordan K. Hubbard 				(void)REQUIRE(count <= count2, REG_BADBR);
97958f0484fSRodney W. Grimes 			} else		/* single number with comma */
98058f0484fSRodney W. Grimes 				count2 = INFINITY;
98158f0484fSRodney W. Grimes 		} else		/* just a single number */
98258f0484fSRodney W. Grimes 			count2 = count;
98358f0484fSRodney W. Grimes 		repeat(p, pos, count, count2);
98458f0484fSRodney W. Grimes 		if (!EATTWO('\\', '}')) {	/* error heuristics */
98558f0484fSRodney W. Grimes 			while (MORE() && !SEETWO('\\', '}'))
98658f0484fSRodney W. Grimes 				NEXT();
98751295a4dSJordan K. Hubbard 			(void)REQUIRE(MORE(), REG_EBRACE);
98858f0484fSRodney W. Grimes 			SETERROR(REG_BADBR);
98958f0484fSRodney W. Grimes 		}
99089b86d02SAndrey A. Chernov 	} else if (c == '$')     /* $ (but not \$) ends it */
99115ae9efaSKyle Evans 		return (true);
99258f0484fSRodney W. Grimes 
99315ae9efaSKyle Evans 	return (false);
99458f0484fSRodney W. Grimes }
99558f0484fSRodney W. Grimes 
99658f0484fSRodney W. Grimes /*
99758f0484fSRodney W. Grimes  - p_count - parse a repetition count
9988fb3f3f6SDavid E. O'Brien  == static int p_count(struct parse *p);
99958f0484fSRodney W. Grimes  */
100058f0484fSRodney W. Grimes static int			/* the value */
p_count(struct parse * p)100154a648d1SXin LI p_count(struct parse *p)
100258f0484fSRodney W. Grimes {
10038fb3f3f6SDavid E. O'Brien 	int count = 0;
10048fb3f3f6SDavid E. O'Brien 	int ndigits = 0;
100558f0484fSRodney W. Grimes 
100689b86d02SAndrey A. Chernov 	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
10073fb80f14SChristos Zoulas 		count = count*10 + ((uch)GETNEXT() - '0');
100858f0484fSRodney W. Grimes 		ndigits++;
100958f0484fSRodney W. Grimes 	}
101058f0484fSRodney W. Grimes 
101151295a4dSJordan K. Hubbard 	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
101258f0484fSRodney W. Grimes 	return(count);
101358f0484fSRodney W. Grimes }
101458f0484fSRodney W. Grimes 
101558f0484fSRodney W. Grimes /*
101658f0484fSRodney W. Grimes  - p_bracket - parse a bracketed character list
10178fb3f3f6SDavid E. O'Brien  == static void p_bracket(struct parse *p);
101858f0484fSRodney W. Grimes  */
101958f0484fSRodney W. Grimes static void
p_bracket(struct parse * p)102054a648d1SXin LI p_bracket(struct parse *p)
102158f0484fSRodney W. Grimes {
1022e5996857STim J. Robbins 	cset *cs;
1023e5996857STim J. Robbins 	wint_t ch;
102458f0484fSRodney W. Grimes 
102558f0484fSRodney W. Grimes 	/* Dept of Truly Sickening Special-Case Kludges */
1026d36b5dbeSMiod Vallat 	if (p->end - p->next > 5) {
1027d36b5dbeSMiod Vallat 		if (strncmp(p->next, "[:<:]]", 6) == 0) {
102858f0484fSRodney W. Grimes 			EMIT(OBOW, 0);
102958f0484fSRodney W. Grimes 			NEXTn(6);
103058f0484fSRodney W. Grimes 			return;
103158f0484fSRodney W. Grimes 		}
1032d36b5dbeSMiod Vallat 		if (strncmp(p->next, "[:>:]]", 6) == 0) {
103358f0484fSRodney W. Grimes 			EMIT(OEOW, 0);
103458f0484fSRodney W. Grimes 			NEXTn(6);
103558f0484fSRodney W. Grimes 			return;
103658f0484fSRodney W. Grimes 		}
1037d36b5dbeSMiod Vallat 	}
103858f0484fSRodney W. Grimes 
1039e5996857STim J. Robbins 	if ((cs = allocset(p)) == NULL)
1040e5996857STim J. Robbins 		return;
1041e5996857STim J. Robbins 
1042e5996857STim J. Robbins 	if (p->g->cflags&REG_ICASE)
1043e5996857STim J. Robbins 		cs->icase = 1;
104458f0484fSRodney W. Grimes 	if (EAT('^'))
1045e5996857STim J. Robbins 		cs->invert = 1;
104658f0484fSRodney W. Grimes 	if (EAT(']'))
1047e5996857STim J. Robbins 		CHadd(p, cs, ']');
104858f0484fSRodney W. Grimes 	else if (EAT('-'))
1049e5996857STim J. Robbins 		CHadd(p, cs, '-');
105058f0484fSRodney W. Grimes 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
105158f0484fSRodney W. Grimes 		p_b_term(p, cs);
105258f0484fSRodney W. Grimes 	if (EAT('-'))
1053e5996857STim J. Robbins 		CHadd(p, cs, '-');
105451295a4dSJordan K. Hubbard 	(void)MUSTEAT(']', REG_EBRACK);
105558f0484fSRodney W. Grimes 
105658f0484fSRodney W. Grimes 	if (p->error != 0)	/* don't mess things up further */
105758f0484fSRodney W. Grimes 		return;
105858f0484fSRodney W. Grimes 
1059e5996857STim J. Robbins 	if (cs->invert && p->g->cflags&REG_NEWLINE)
1060e5996857STim J. Robbins 		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
106158f0484fSRodney W. Grimes 
1062e5996857STim J. Robbins 	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
1063e5996857STim J. Robbins 		ordinary(p, ch);
106458f0484fSRodney W. Grimes 		freeset(p, cs);
106558f0484fSRodney W. Grimes 	} else
1066e5996857STim J. Robbins 		EMIT(OANYOF, (int)(cs - p->g->sets));
106758f0484fSRodney W. Grimes }
106858f0484fSRodney W. Grimes 
1069fe5bf674SKyle Evans static int
p_range_cmp(wchar_t c1,wchar_t c2)1070fe5bf674SKyle Evans p_range_cmp(wchar_t c1, wchar_t c2)
1071fe5bf674SKyle Evans {
1072fe5bf674SKyle Evans #ifndef LIBREGEX
1073fe5bf674SKyle Evans 	return __wcollate_range_cmp(c1, c2);
1074fe5bf674SKyle Evans #else
1075fe5bf674SKyle Evans 	/* Copied from libc/collate __wcollate_range_cmp */
1076fe5bf674SKyle Evans 	wchar_t s1[2], s2[2];
1077fe5bf674SKyle Evans 
1078fe5bf674SKyle Evans 	s1[0] = c1;
1079fe5bf674SKyle Evans 	s1[1] = L'\0';
1080fe5bf674SKyle Evans 	s2[0] = c2;
1081fe5bf674SKyle Evans 	s2[1] = L'\0';
1082fe5bf674SKyle Evans 	return (wcscoll(s1, s2));
1083fe5bf674SKyle Evans #endif
1084fe5bf674SKyle Evans }
1085fe5bf674SKyle Evans 
108658f0484fSRodney W. Grimes /*
108758f0484fSRodney W. Grimes  - p_b_term - parse one term of a bracketed character list
10888fb3f3f6SDavid E. O'Brien  == static void p_b_term(struct parse *p, cset *cs);
108958f0484fSRodney W. Grimes  */
109058f0484fSRodney W. Grimes static void
p_b_term(struct parse * p,cset * cs)109154a648d1SXin LI p_b_term(struct parse *p, cset *cs)
109258f0484fSRodney W. Grimes {
10938fb3f3f6SDavid E. O'Brien 	char c;
1094e5996857STim J. Robbins 	wint_t start, finish;
10951daad8f5SAndrey A. Chernov 	wint_t i;
1096fe5bf674SKyle Evans #ifndef LIBREGEX
10971daad8f5SAndrey A. Chernov 	struct xlocale_collate *table =
10981daad8f5SAndrey A. Chernov 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
1099fe5bf674SKyle Evans #endif
110058f0484fSRodney W. Grimes 	/* classify what we've got */
110158f0484fSRodney W. Grimes 	switch ((MORE()) ? PEEK() : '\0') {
110258f0484fSRodney W. Grimes 	case '[':
110358f0484fSRodney W. Grimes 		c = (MORE2()) ? PEEK2() : '\0';
110458f0484fSRodney W. Grimes 		break;
110558f0484fSRodney W. Grimes 	case '-':
110658f0484fSRodney W. Grimes 		SETERROR(REG_ERANGE);
110758f0484fSRodney W. Grimes 		return;			/* NOTE RETURN */
110858f0484fSRodney W. Grimes 	default:
110958f0484fSRodney W. Grimes 		c = '\0';
111058f0484fSRodney W. Grimes 		break;
111158f0484fSRodney W. Grimes 	}
111258f0484fSRodney W. Grimes 
111358f0484fSRodney W. Grimes 	switch (c) {
111458f0484fSRodney W. Grimes 	case ':':		/* character class */
111558f0484fSRodney W. Grimes 		NEXT2();
111651295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
111758f0484fSRodney W. Grimes 		c = PEEK();
111851295a4dSJordan K. Hubbard 		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
111958f0484fSRodney W. Grimes 		p_b_cclass(p, cs);
112051295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
112151295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
112258f0484fSRodney W. Grimes 		break;
112358f0484fSRodney W. Grimes 	case '=':		/* equivalence class */
112458f0484fSRodney W. Grimes 		NEXT2();
112551295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
112658f0484fSRodney W. Grimes 		c = PEEK();
112751295a4dSJordan K. Hubbard 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
112858f0484fSRodney W. Grimes 		p_b_eclass(p, cs);
112951295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
113051295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
113158f0484fSRodney W. Grimes 		break;
113258f0484fSRodney W. Grimes 	default:		/* symbol, ordinary character, or range */
113358f0484fSRodney W. Grimes 		start = p_b_symbol(p);
113458f0484fSRodney W. Grimes 		if (SEE('-') && MORE2() && PEEK2() != ']') {
113558f0484fSRodney W. Grimes 			/* range */
113658f0484fSRodney W. Grimes 			NEXT();
113758f0484fSRodney W. Grimes 			if (EAT('-'))
113858f0484fSRodney W. Grimes 				finish = '-';
113958f0484fSRodney W. Grimes 			else
114058f0484fSRodney W. Grimes 				finish = p_b_symbol(p);
114158f0484fSRodney W. Grimes 		} else
114258f0484fSRodney W. Grimes 			finish = start;
11435c551438SAndrey A. Chernov 		if (start == finish)
1144e5996857STim J. Robbins 			CHadd(p, cs, start);
11455c551438SAndrey A. Chernov 		else {
1146fe5bf674SKyle Evans #ifndef LIBREGEX
114712eae8c8SAndrey A. Chernov 			if (table->__collate_load_error || MB_CUR_MAX > 1) {
1148fe5bf674SKyle Evans #else
1149fe5bf674SKyle Evans 			if (MB_CUR_MAX > 1) {
1150fe5bf674SKyle Evans #endif
115112eae8c8SAndrey A. Chernov 				(void)REQUIRE(start <= finish, REG_ERANGE);
1152e5996857STim J. Robbins 				CHaddrange(p, cs, start, finish);
11531daad8f5SAndrey A. Chernov 			} else {
1154fe5bf674SKyle Evans 				(void)REQUIRE(p_range_cmp(start, finish) <= 0, REG_ERANGE);
11551daad8f5SAndrey A. Chernov 				for (i = 0; i <= UCHAR_MAX; i++) {
1156fe5bf674SKyle Evans 					if (p_range_cmp(start, i) <= 0 &&
1157fe5bf674SKyle Evans 					    p_range_cmp(i, finish) <= 0 )
11581daad8f5SAndrey A. Chernov 						CHadd(p, cs, i);
11591daad8f5SAndrey A. Chernov 				}
11601daad8f5SAndrey A. Chernov 			}
1161b5a6eb18SAndrey A. Chernov 		}
116258f0484fSRodney W. Grimes 		break;
116358f0484fSRodney W. Grimes 	}
116458f0484fSRodney W. Grimes }
116558f0484fSRodney W. Grimes 
116658f0484fSRodney W. Grimes /*
116718a1e2e9SKyle Evans  - p_b_pseudoclass - parse a pseudo-class (\w, \W, \s, \S)
116818a1e2e9SKyle Evans  == static int p_b_pseudoclass(struct parse *p, char c)
116918a1e2e9SKyle Evans  */
117018a1e2e9SKyle Evans static int
117118a1e2e9SKyle Evans p_b_pseudoclass(struct parse *p, char c) {
117218a1e2e9SKyle Evans 	cset *cs;
117318a1e2e9SKyle Evans 
117418a1e2e9SKyle Evans 	if ((cs = allocset(p)) == NULL)
117518a1e2e9SKyle Evans 		return(0);
117618a1e2e9SKyle Evans 
117718a1e2e9SKyle Evans 	if (p->g->cflags&REG_ICASE)
117818a1e2e9SKyle Evans 		cs->icase = 1;
117918a1e2e9SKyle Evans 
118018a1e2e9SKyle Evans 	switch (c) {
118118a1e2e9SKyle Evans 	case 'W':
118218a1e2e9SKyle Evans 		cs->invert = 1;
118318a1e2e9SKyle Evans 		/* PASSTHROUGH */
118418a1e2e9SKyle Evans 	case 'w':
118518a1e2e9SKyle Evans 		p_b_cclass_named(p, cs, "alnum");
118618a1e2e9SKyle Evans 		break;
118718a1e2e9SKyle Evans 	case 'S':
118818a1e2e9SKyle Evans 		cs->invert = 1;
118918a1e2e9SKyle Evans 		/* PASSTHROUGH */
119018a1e2e9SKyle Evans 	case 's':
119118a1e2e9SKyle Evans 		p_b_cclass_named(p, cs, "space");
119218a1e2e9SKyle Evans 		break;
119318a1e2e9SKyle Evans 	default:
119418a1e2e9SKyle Evans 		return(0);
119518a1e2e9SKyle Evans 	}
119618a1e2e9SKyle Evans 
119718a1e2e9SKyle Evans 	EMIT(OANYOF, (int)(cs - p->g->sets));
119818a1e2e9SKyle Evans 	return(1);
119918a1e2e9SKyle Evans }
120018a1e2e9SKyle Evans 
120118a1e2e9SKyle Evans /*
120258f0484fSRodney W. Grimes  - p_b_cclass - parse a character-class name and deal with it
12038fb3f3f6SDavid E. O'Brien  == static void p_b_cclass(struct parse *p, cset *cs);
120458f0484fSRodney W. Grimes  */
120558f0484fSRodney W. Grimes static void
120654a648d1SXin LI p_b_cclass(struct parse *p, cset *cs)
120758f0484fSRodney W. Grimes {
12088d0f9a93SPedro F. Giffuni 	const char *sp = p->next;
12098fb3f3f6SDavid E. O'Brien 	size_t len;
1210e5996857STim J. Robbins 	char clname[16];
121158f0484fSRodney W. Grimes 
1212b5363c4aSAndrey A. Chernov 	while (MORE() && isalpha((uch)PEEK()))
121358f0484fSRodney W. Grimes 		NEXT();
121458f0484fSRodney W. Grimes 	len = p->next - sp;
1215e5996857STim J. Robbins 	if (len >= sizeof(clname) - 1) {
121658f0484fSRodney W. Grimes 		SETERROR(REG_ECTYPE);
121758f0484fSRodney W. Grimes 		return;
121858f0484fSRodney W. Grimes 	}
1219e5996857STim J. Robbins 	memcpy(clname, sp, len);
1220e5996857STim J. Robbins 	clname[len] = '\0';
122118a1e2e9SKyle Evans 
122218a1e2e9SKyle Evans 	p_b_cclass_named(p, cs, clname);
122318a1e2e9SKyle Evans }
122418a1e2e9SKyle Evans /*
122518a1e2e9SKyle Evans  - p_b_cclass_named - deal with a named character class
122618a1e2e9SKyle Evans  == static void p_b_cclass_named(struct parse *p, cset *cs, const char []);
122718a1e2e9SKyle Evans  */
122818a1e2e9SKyle Evans static void
122918a1e2e9SKyle Evans p_b_cclass_named(struct parse *p, cset *cs, const char clname[]) {
123018a1e2e9SKyle Evans 	wctype_t wct;
123118a1e2e9SKyle Evans 
1232e5996857STim J. Robbins 	if ((wct = wctype(clname)) == 0) {
1233e5996857STim J. Robbins 		SETERROR(REG_ECTYPE);
1234e5996857STim J. Robbins 		return;
1235b5363c4aSAndrey A. Chernov 	}
1236e5996857STim J. Robbins 	CHaddtype(p, cs, wct);
123758f0484fSRodney W. Grimes }
123858f0484fSRodney W. Grimes 
123958f0484fSRodney W. Grimes /*
124058f0484fSRodney W. Grimes  - p_b_eclass - parse an equivalence-class name and deal with it
12418fb3f3f6SDavid E. O'Brien  == static void p_b_eclass(struct parse *p, cset *cs);
124258f0484fSRodney W. Grimes  *
124358f0484fSRodney W. Grimes  * This implementation is incomplete. xxx
124458f0484fSRodney W. Grimes  */
124558f0484fSRodney W. Grimes static void
124654a648d1SXin LI p_b_eclass(struct parse *p, cset *cs)
124758f0484fSRodney W. Grimes {
1248e5996857STim J. Robbins 	wint_t c;
124958f0484fSRodney W. Grimes 
125058f0484fSRodney W. Grimes 	c = p_b_coll_elem(p, '=');
1251e5996857STim J. Robbins 	CHadd(p, cs, c);
125258f0484fSRodney W. Grimes }
125358f0484fSRodney W. Grimes 
125458f0484fSRodney W. Grimes /*
125558f0484fSRodney W. Grimes  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
12562bf213ebSKevin Lo  == static wint_t p_b_symbol(struct parse *p);
125758f0484fSRodney W. Grimes  */
1258e5996857STim J. Robbins static wint_t			/* value of symbol */
125954a648d1SXin LI p_b_symbol(struct parse *p)
126058f0484fSRodney W. Grimes {
1261e5996857STim J. Robbins 	wint_t value;
126258f0484fSRodney W. Grimes 
126351295a4dSJordan K. Hubbard 	(void)REQUIRE(MORE(), REG_EBRACK);
126458f0484fSRodney W. Grimes 	if (!EATTWO('[', '.'))
1265e5996857STim J. Robbins 		return(WGETNEXT());
126658f0484fSRodney W. Grimes 
126758f0484fSRodney W. Grimes 	/* collating symbol */
126858f0484fSRodney W. Grimes 	value = p_b_coll_elem(p, '.');
126951295a4dSJordan K. Hubbard 	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
127058f0484fSRodney W. Grimes 	return(value);
127158f0484fSRodney W. Grimes }
127258f0484fSRodney W. Grimes 
127358f0484fSRodney W. Grimes /*
127458f0484fSRodney W. Grimes  - p_b_coll_elem - parse a collating-element name and look it up
12752bf213ebSKevin Lo  == static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
127658f0484fSRodney W. Grimes  */
1277e5996857STim J. Robbins static wint_t			/* value of collating element */
127854a648d1SXin LI p_b_coll_elem(struct parse *p,
127954a648d1SXin LI 	wint_t endc)		/* name ended by endc,']' */
128058f0484fSRodney W. Grimes {
12818d0f9a93SPedro F. Giffuni 	const char *sp = p->next;
12828fb3f3f6SDavid E. O'Brien 	struct cname *cp;
1283e5996857STim J. Robbins 	mbstate_t mbs;
1284e5996857STim J. Robbins 	wchar_t wc;
12858d0f9a93SPedro F. Giffuni 	size_t clen, len;
128658f0484fSRodney W. Grimes 
128758f0484fSRodney W. Grimes 	while (MORE() && !SEETWO(endc, ']'))
128858f0484fSRodney W. Grimes 		NEXT();
128958f0484fSRodney W. Grimes 	if (!MORE()) {
129058f0484fSRodney W. Grimes 		SETERROR(REG_EBRACK);
129158f0484fSRodney W. Grimes 		return(0);
129258f0484fSRodney W. Grimes 	}
129358f0484fSRodney W. Grimes 	len = p->next - sp;
129458f0484fSRodney W. Grimes 	for (cp = cnames; cp->name != NULL; cp++)
12950f23ab8aSPedro F. Giffuni 		if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
129658f0484fSRodney W. Grimes 			return(cp->code);	/* known name */
1297e5996857STim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
1298e5996857STim J. Robbins 	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
1299e5996857STim J. Robbins 		return (wc);			/* single character */
1300e5996857STim J. Robbins 	else if (clen == (size_t)-1 || clen == (size_t)-2)
1301e5996857STim J. Robbins 		SETERROR(REG_ILLSEQ);
1302e5996857STim J. Robbins 	else
130358f0484fSRodney W. Grimes 		SETERROR(REG_ECOLLATE);		/* neither */
130458f0484fSRodney W. Grimes 	return(0);
130558f0484fSRodney W. Grimes }
130658f0484fSRodney W. Grimes 
130758f0484fSRodney W. Grimes /*
1308adeebf4cSKyle Evans  - may_escape - determine whether 'ch' is escape-able in the current context
1309adeebf4cSKyle Evans  == static int may_escape(struct parse *p, const wint_t ch)
1310adeebf4cSKyle Evans  */
1311adeebf4cSKyle Evans static bool
1312adeebf4cSKyle Evans may_escape(struct parse *p, const wint_t ch)
1313adeebf4cSKyle Evans {
1314adeebf4cSKyle Evans 
1315adeebf4cSKyle Evans 	if ((p->pflags & PFLAG_LEGACY_ESC) != 0)
1316adeebf4cSKyle Evans 		return (true);
13173fb80f14SChristos Zoulas 	if (iswalpha(ch) || ch == '\'' || ch == '`')
1318adeebf4cSKyle Evans 		return (false);
1319adeebf4cSKyle Evans 	return (true);
1320adeebf4cSKyle Evans #ifdef NOTYET
1321adeebf4cSKyle Evans 	/*
1322adeebf4cSKyle Evans 	 * Build a whitelist of characters that may be escaped to produce an
1323adeebf4cSKyle Evans 	 * ordinary in the current context. This assumes that these have not
1324adeebf4cSKyle Evans 	 * been otherwise interpreted as a special character. Escaping an
1325adeebf4cSKyle Evans 	 * ordinary character yields undefined results according to
1326adeebf4cSKyle Evans 	 * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take
1327adeebf4cSKyle Evans 	 * advantage of this and use escaped ordinary characters to provide
1328adeebf4cSKyle Evans 	 * special meaning, e.g. \b, \B, \w, \W, \s, \S.
1329adeebf4cSKyle Evans 	 */
1330adeebf4cSKyle Evans 	switch(ch) {
1331adeebf4cSKyle Evans 	case '|':
1332adeebf4cSKyle Evans 	case '+':
1333adeebf4cSKyle Evans 	case '?':
1334adeebf4cSKyle Evans 		/* The above characters may not be escaped in BREs */
1335adeebf4cSKyle Evans 		if (!(p->g->cflags&REG_EXTENDED))
1336adeebf4cSKyle Evans 			return (false);
1337adeebf4cSKyle Evans 		/* Fallthrough */
1338adeebf4cSKyle Evans 	case '(':
1339adeebf4cSKyle Evans 	case ')':
1340adeebf4cSKyle Evans 	case '{':
1341adeebf4cSKyle Evans 	case '}':
1342adeebf4cSKyle Evans 	case '.':
1343adeebf4cSKyle Evans 	case '[':
1344adeebf4cSKyle Evans 	case ']':
1345adeebf4cSKyle Evans 	case '\\':
1346adeebf4cSKyle Evans 	case '*':
1347adeebf4cSKyle Evans 	case '^':
1348adeebf4cSKyle Evans 	case '$':
1349adeebf4cSKyle Evans 		return (true);
1350adeebf4cSKyle Evans 	default:
1351adeebf4cSKyle Evans 		return (false);
1352adeebf4cSKyle Evans 	}
1353adeebf4cSKyle Evans #endif
1354adeebf4cSKyle Evans }
1355adeebf4cSKyle Evans 
1356adeebf4cSKyle Evans /*
135758f0484fSRodney W. Grimes  - othercase - return the case counterpart of an alphabetic
13582bf213ebSKevin Lo  == static wint_t othercase(wint_t ch);
135958f0484fSRodney W. Grimes  */
1360e5996857STim J. Robbins static wint_t			/* if no counterpart, return ch */
136154a648d1SXin LI othercase(wint_t ch)
136258f0484fSRodney W. Grimes {
1363e5996857STim J. Robbins 	assert(iswalpha(ch));
1364e5996857STim J. Robbins 	if (iswupper(ch))
1365e5996857STim J. Robbins 		return(towlower(ch));
1366e5996857STim J. Robbins 	else if (iswlower(ch))
1367e5996857STim J. Robbins 		return(towupper(ch));
136858f0484fSRodney W. Grimes 	else			/* peculiar, but could happen */
136958f0484fSRodney W. Grimes 		return(ch);
137058f0484fSRodney W. Grimes }
137158f0484fSRodney W. Grimes 
137258f0484fSRodney W. Grimes /*
137358f0484fSRodney W. Grimes  - bothcases - emit a dualcase version of a two-case character
13742bf213ebSKevin Lo  == static void bothcases(struct parse *p, wint_t ch);
137558f0484fSRodney W. Grimes  *
137658f0484fSRodney W. Grimes  * Boy, is this implementation ever a kludge...
137758f0484fSRodney W. Grimes  */
137858f0484fSRodney W. Grimes static void
137954a648d1SXin LI bothcases(struct parse *p, wint_t ch)
138058f0484fSRodney W. Grimes {
13818d0f9a93SPedro F. Giffuni 	const char *oldnext = p->next;
13828d0f9a93SPedro F. Giffuni 	const char *oldend = p->end;
1383e5996857STim J. Robbins 	char bracket[3 + MB_LEN_MAX];
1384e5996857STim J. Robbins 	size_t n;
1385e5996857STim J. Robbins 	mbstate_t mbs;
138658f0484fSRodney W. Grimes 
138758f0484fSRodney W. Grimes 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
138858f0484fSRodney W. Grimes 	p->next = bracket;
1389e5996857STim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
1390e5996857STim J. Robbins 	n = wcrtomb(bracket, ch, &mbs);
1391e5996857STim J. Robbins 	assert(n != (size_t)-1);
1392e5996857STim J. Robbins 	bracket[n] = ']';
1393e5996857STim J. Robbins 	bracket[n + 1] = '\0';
1394e5996857STim J. Robbins 	p->end = bracket+n+1;
139558f0484fSRodney W. Grimes 	p_bracket(p);
1396e5996857STim J. Robbins 	assert(p->next == p->end);
139758f0484fSRodney W. Grimes 	p->next = oldnext;
139858f0484fSRodney W. Grimes 	p->end = oldend;
139958f0484fSRodney W. Grimes }
140058f0484fSRodney W. Grimes 
140158f0484fSRodney W. Grimes /*
140258f0484fSRodney W. Grimes  - ordinary - emit an ordinary character
14032bf213ebSKevin Lo  == static void ordinary(struct parse *p, wint_t ch);
140458f0484fSRodney W. Grimes  */
140558f0484fSRodney W. Grimes static void
140654a648d1SXin LI ordinary(struct parse *p, wint_t ch)
140758f0484fSRodney W. Grimes {
1408e5996857STim J. Robbins 	cset *cs;
140958f0484fSRodney W. Grimes 
1410e5996857STim J. Robbins 	if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
141158f0484fSRodney W. Grimes 		bothcases(p, ch);
1412e5996857STim J. Robbins 	else if ((ch & OPDMASK) == ch)
1413e5996857STim J. Robbins 		EMIT(OCHAR, ch);
1414e5996857STim J. Robbins 	else {
1415e5996857STim J. Robbins 		/*
1416e5996857STim J. Robbins 		 * Kludge: character is too big to fit into an OCHAR operand.
1417e5996857STim J. Robbins 		 * Emit a singleton set.
1418e5996857STim J. Robbins 		 */
1419e5996857STim J. Robbins 		if ((cs = allocset(p)) == NULL)
1420e5996857STim J. Robbins 			return;
1421e5996857STim J. Robbins 		CHadd(p, cs, ch);
1422e5996857STim J. Robbins 		EMIT(OANYOF, (int)(cs - p->g->sets));
1423e5996857STim J. Robbins 	}
142458f0484fSRodney W. Grimes }
142558f0484fSRodney W. Grimes 
142658f0484fSRodney W. Grimes /*
142758f0484fSRodney W. Grimes  - nonnewline - emit REG_NEWLINE version of OANY
14288fb3f3f6SDavid E. O'Brien  == static void nonnewline(struct parse *p);
142958f0484fSRodney W. Grimes  *
143058f0484fSRodney W. Grimes  * Boy, is this implementation ever a kludge...
143158f0484fSRodney W. Grimes  */
143258f0484fSRodney W. Grimes static void
143354a648d1SXin LI nonnewline(struct parse *p)
143458f0484fSRodney W. Grimes {
14358d0f9a93SPedro F. Giffuni 	const char *oldnext = p->next;
14368d0f9a93SPedro F. Giffuni 	const char *oldend = p->end;
143758f0484fSRodney W. Grimes 	char bracket[4];
143858f0484fSRodney W. Grimes 
143958f0484fSRodney W. Grimes 	p->next = bracket;
144058f0484fSRodney W. Grimes 	p->end = bracket+3;
144158f0484fSRodney W. Grimes 	bracket[0] = '^';
144258f0484fSRodney W. Grimes 	bracket[1] = '\n';
144358f0484fSRodney W. Grimes 	bracket[2] = ']';
144458f0484fSRodney W. Grimes 	bracket[3] = '\0';
144558f0484fSRodney W. Grimes 	p_bracket(p);
144658f0484fSRodney W. Grimes 	assert(p->next == bracket+3);
144758f0484fSRodney W. Grimes 	p->next = oldnext;
144858f0484fSRodney W. Grimes 	p->end = oldend;
144958f0484fSRodney W. Grimes }
145058f0484fSRodney W. Grimes 
145158f0484fSRodney W. Grimes /*
145258f0484fSRodney W. Grimes  - repeat - generate code for a bounded repetition, recursively if needed
14538fb3f3f6SDavid E. O'Brien  == static void repeat(struct parse *p, sopno start, int from, int to);
145458f0484fSRodney W. Grimes  */
145558f0484fSRodney W. Grimes static void
145654a648d1SXin LI repeat(struct parse *p,
145754a648d1SXin LI 	sopno start,		/* operand from here to end of strip */
145854a648d1SXin LI 	int from,		/* repeated from this number */
145954a648d1SXin LI 	int to)			/* to this number of times (maybe INFINITY) */
146058f0484fSRodney W. Grimes {
14618fb3f3f6SDavid E. O'Brien 	sopno finish = HERE();
146258f0484fSRodney W. Grimes #	define	N	2
146358f0484fSRodney W. Grimes #	define	INF	3
146458f0484fSRodney W. Grimes #	define	REP(f, t)	((f)*8 + (t))
146558f0484fSRodney W. Grimes #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
14668fb3f3f6SDavid E. O'Brien 	sopno copy;
146758f0484fSRodney W. Grimes 
146858f0484fSRodney W. Grimes 	if (p->error != 0)	/* head off possible runaway recursion */
146958f0484fSRodney W. Grimes 		return;
147058f0484fSRodney W. Grimes 
147158f0484fSRodney W. Grimes 	assert(from <= to);
147258f0484fSRodney W. Grimes 
147358f0484fSRodney W. Grimes 	switch (REP(MAP(from), MAP(to))) {
147458f0484fSRodney W. Grimes 	case REP(0, 0):			/* must be user doing this */
147558f0484fSRodney W. Grimes 		DROP(finish-start);	/* drop the operand */
147658f0484fSRodney W. Grimes 		break;
147758f0484fSRodney W. Grimes 	case REP(0, 1):			/* as x{1,1}? */
147858f0484fSRodney W. Grimes 	case REP(0, N):			/* as x{1,n}? */
147958f0484fSRodney W. Grimes 	case REP(0, INF):		/* as x{1,}? */
148058f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
148158f0484fSRodney W. Grimes 		INSERT(OCH_, start);		/* offset is wrong... */
148258f0484fSRodney W. Grimes 		repeat(p, start+1, 1, to);
148358f0484fSRodney W. Grimes 		ASTERN(OOR1, start);
148458f0484fSRodney W. Grimes 		AHEAD(start);			/* ... fix it */
148558f0484fSRodney W. Grimes 		EMIT(OOR2, 0);
148658f0484fSRodney W. Grimes 		AHEAD(THERE());
148758f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
148858f0484fSRodney W. Grimes 		break;
148958f0484fSRodney W. Grimes 	case REP(1, 1):			/* trivial case */
149058f0484fSRodney W. Grimes 		/* done */
149158f0484fSRodney W. Grimes 		break;
149258f0484fSRodney W. Grimes 	case REP(1, N):			/* as x?x{1,n-1} */
149358f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
149458f0484fSRodney W. Grimes 		INSERT(OCH_, start);
149558f0484fSRodney W. Grimes 		ASTERN(OOR1, start);
149658f0484fSRodney W. Grimes 		AHEAD(start);
149758f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset very wrong... */
149858f0484fSRodney W. Grimes 		AHEAD(THERE());			/* ...so fix it */
149958f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
150058f0484fSRodney W. Grimes 		copy = dupl(p, start+1, finish+1);
150158f0484fSRodney W. Grimes 		assert(copy == finish+4);
150258f0484fSRodney W. Grimes 		repeat(p, copy, 1, to-1);
150358f0484fSRodney W. Grimes 		break;
150458f0484fSRodney W. Grimes 	case REP(1, INF):		/* as x+ */
150558f0484fSRodney W. Grimes 		INSERT(OPLUS_, start);
150658f0484fSRodney W. Grimes 		ASTERN(O_PLUS, start);
150758f0484fSRodney W. Grimes 		break;
150858f0484fSRodney W. Grimes 	case REP(N, N):			/* as xx{m-1,n-1} */
150958f0484fSRodney W. Grimes 		copy = dupl(p, start, finish);
151058f0484fSRodney W. Grimes 		repeat(p, copy, from-1, to-1);
151158f0484fSRodney W. Grimes 		break;
151258f0484fSRodney W. Grimes 	case REP(N, INF):		/* as xx{n-1,INF} */
151358f0484fSRodney W. Grimes 		copy = dupl(p, start, finish);
151458f0484fSRodney W. Grimes 		repeat(p, copy, from-1, to);
151558f0484fSRodney W. Grimes 		break;
151658f0484fSRodney W. Grimes 	default:			/* "can't happen" */
151758f0484fSRodney W. Grimes 		SETERROR(REG_ASSERT);	/* just in case */
151858f0484fSRodney W. Grimes 		break;
151958f0484fSRodney W. Grimes 	}
152058f0484fSRodney W. Grimes }
152158f0484fSRodney W. Grimes 
152258f0484fSRodney W. Grimes /*
1523e5996857STim J. Robbins  - wgetnext - helper function for WGETNEXT() macro. Gets the next wide
1524e5996857STim J. Robbins  - character from the parse struct, signals a REG_ILLSEQ error if the
1525e5996857STim J. Robbins  - character can't be converted. Returns the number of bytes consumed.
1526e5996857STim J. Robbins  */
1527e5996857STim J. Robbins static wint_t
152854a648d1SXin LI wgetnext(struct parse *p)
1529e5996857STim J. Robbins {
1530e5996857STim J. Robbins 	mbstate_t mbs;
1531e5996857STim J. Robbins 	wchar_t wc;
1532e5996857STim J. Robbins 	size_t n;
1533e5996857STim J. Robbins 
1534e5996857STim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
1535e5996857STim J. Robbins 	n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
1536e5996857STim J. Robbins 	if (n == (size_t)-1 || n == (size_t)-2) {
1537e5996857STim J. Robbins 		SETERROR(REG_ILLSEQ);
1538e5996857STim J. Robbins 		return (0);
1539e5996857STim J. Robbins 	}
1540e5996857STim J. Robbins 	if (n == 0)
1541e5996857STim J. Robbins 		n = 1;
1542e5996857STim J. Robbins 	p->next += n;
1543e5996857STim J. Robbins 	return (wc);
1544e5996857STim J. Robbins }
1545e5996857STim J. Robbins 
1546e5996857STim J. Robbins /*
154758f0484fSRodney W. Grimes  - seterr - set an error condition
15488fb3f3f6SDavid E. O'Brien  == static int seterr(struct parse *p, int e);
154958f0484fSRodney W. Grimes  */
155058f0484fSRodney W. Grimes static int			/* useless but makes type checking happy */
155154a648d1SXin LI seterr(struct parse *p, int e)
155258f0484fSRodney W. Grimes {
155358f0484fSRodney W. Grimes 	if (p->error == 0)	/* keep earliest error condition */
155458f0484fSRodney W. Grimes 		p->error = e;
155558f0484fSRodney W. Grimes 	p->next = nuls;		/* try to bring things to a halt */
155658f0484fSRodney W. Grimes 	p->end = nuls;
155758f0484fSRodney W. Grimes 	return(0);		/* make the return value well-defined */
155858f0484fSRodney W. Grimes }
155958f0484fSRodney W. Grimes 
156058f0484fSRodney W. Grimes /*
156158f0484fSRodney W. Grimes  - allocset - allocate a set of characters for []
15628fb3f3f6SDavid E. O'Brien  == static cset *allocset(struct parse *p);
156358f0484fSRodney W. Grimes  */
156458f0484fSRodney W. Grimes static cset *
156554a648d1SXin LI allocset(struct parse *p)
156658f0484fSRodney W. Grimes {
1567e5996857STim J. Robbins 	cset *cs, *ncs;
156858f0484fSRodney W. Grimes 
15699f36610fSPedro F. Giffuni 	ncs = reallocarray(p->g->sets, p->g->ncsets + 1, sizeof(*ncs));
1570e5996857STim J. Robbins 	if (ncs == NULL) {
157158f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
1572e5996857STim J. Robbins 		return (NULL);
157358f0484fSRodney W. Grimes 	}
1574e5996857STim J. Robbins 	p->g->sets = ncs;
1575e5996857STim J. Robbins 	cs = &p->g->sets[p->g->ncsets++];
1576e5996857STim J. Robbins 	memset(cs, 0, sizeof(*cs));
157758f0484fSRodney W. Grimes 
157858f0484fSRodney W. Grimes 	return(cs);
157958f0484fSRodney W. Grimes }
158058f0484fSRodney W. Grimes 
158158f0484fSRodney W. Grimes /*
158258f0484fSRodney W. Grimes  - freeset - free a now-unused set
15838fb3f3f6SDavid E. O'Brien  == static void freeset(struct parse *p, cset *cs);
158458f0484fSRodney W. Grimes  */
158558f0484fSRodney W. Grimes static void
158654a648d1SXin LI freeset(struct parse *p, cset *cs)
158758f0484fSRodney W. Grimes {
15888fb3f3f6SDavid E. O'Brien 	cset *top = &p->g->sets[p->g->ncsets];
158958f0484fSRodney W. Grimes 
1590e5996857STim J. Robbins 	free(cs->wides);
1591e5996857STim J. Robbins 	free(cs->ranges);
1592e5996857STim J. Robbins 	free(cs->types);
1593e5996857STim J. Robbins 	memset(cs, 0, sizeof(*cs));
159458f0484fSRodney W. Grimes 	if (cs == top-1)	/* recover only the easy case */
159558f0484fSRodney W. Grimes 		p->g->ncsets--;
159658f0484fSRodney W. Grimes }
159758f0484fSRodney W. Grimes 
159858f0484fSRodney W. Grimes /*
1599e5996857STim J. Robbins  - singleton - Determine whether a set contains only one character,
1600e5996857STim J. Robbins  - returning it if so, otherwise returning OUT.
160158f0484fSRodney W. Grimes  */
1602e5996857STim J. Robbins static wint_t
160354a648d1SXin LI singleton(cset *cs)
160458f0484fSRodney W. Grimes {
1605e5996857STim J. Robbins 	wint_t i, s, n;
160658f0484fSRodney W. Grimes 
16078f7ed58aSBill Sommerfeld 	/* Exclude the complicated cases we don't want to deal with */
16088f7ed58aSBill Sommerfeld 	if (cs->nranges != 0 || cs->ntypes != 0 || cs->icase != 0)
16098f7ed58aSBill Sommerfeld 		return (OUT);
16108f7ed58aSBill Sommerfeld 
16118f7ed58aSBill Sommerfeld 	if (cs->nwides > 1)
16128f7ed58aSBill Sommerfeld 		return (OUT);
16138f7ed58aSBill Sommerfeld 
16148f7ed58aSBill Sommerfeld 	/* Count the number of characters present in the bitmap */
1615e5996857STim J. Robbins 	for (i = n = 0; i < NC; i++)
1616e5996857STim J. Robbins 		if (CHIN(cs, i)) {
161758f0484fSRodney W. Grimes 			n++;
1618e5996857STim J. Robbins 			s = i;
1619e5996857STim J. Robbins 		}
16208f7ed58aSBill Sommerfeld 
16218f7ed58aSBill Sommerfeld 	if (n > 1)
16228f7ed58aSBill Sommerfeld 		return (OUT);
16238f7ed58aSBill Sommerfeld 
16248f7ed58aSBill Sommerfeld 	if (n == 1) {
16258f7ed58aSBill Sommerfeld 		if (cs->nwides == 0)
1626e5996857STim J. Robbins 			return (s);
16278f7ed58aSBill Sommerfeld 		else
16288f7ed58aSBill Sommerfeld 			return (OUT);
16298f7ed58aSBill Sommerfeld 	}
16308f7ed58aSBill Sommerfeld 	if (cs->nwides == 1)
1631e5996857STim J. Robbins 		return (cs->wides[0]);
16328f7ed58aSBill Sommerfeld 
1633e5996857STim J. Robbins 	return (OUT);
1634e5996857STim J. Robbins }
1635e5996857STim J. Robbins 
1636e5996857STim J. Robbins /*
1637e5996857STim J. Robbins  - CHadd - add character to character set.
1638e5996857STim J. Robbins  */
1639e5996857STim J. Robbins static void
164054a648d1SXin LI CHadd(struct parse *p, cset *cs, wint_t ch)
1641e5996857STim J. Robbins {
164233176f17STim J. Robbins 	wint_t nch, *newwides;
1643e5996857STim J. Robbins 	assert(ch >= 0);
164433176f17STim J. Robbins 	if (ch < NC)
1645e5996857STim J. Robbins 		cs->bmp[ch >> 3] |= 1 << (ch & 7);
164633176f17STim J. Robbins 	else {
16479f36610fSPedro F. Giffuni 		newwides = reallocarray(cs->wides, cs->nwides + 1,
1648e5996857STim J. Robbins 		    sizeof(*cs->wides));
1649e5996857STim J. Robbins 		if (newwides == NULL) {
1650e5996857STim J. Robbins 			SETERROR(REG_ESPACE);
1651e5996857STim J. Robbins 			return;
1652e5996857STim J. Robbins 		}
1653e5996857STim J. Robbins 		cs->wides = newwides;
1654e5996857STim J. Robbins 		cs->wides[cs->nwides++] = ch;
1655e5996857STim J. Robbins 	}
165633176f17STim J. Robbins 	if (cs->icase) {
165733176f17STim J. Robbins 		if ((nch = towlower(ch)) < NC)
165833176f17STim J. Robbins 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
165933176f17STim J. Robbins 		if ((nch = towupper(ch)) < NC)
166033176f17STim J. Robbins 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
166133176f17STim J. Robbins 	}
1662e5996857STim J. Robbins }
1663e5996857STim J. Robbins 
1664e5996857STim J. Robbins /*
1665e5996857STim J. Robbins  - CHaddrange - add all characters in the range [min,max] to a character set.
1666e5996857STim J. Robbins  */
1667e5996857STim J. Robbins static void
166854a648d1SXin LI CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
1669e5996857STim J. Robbins {
1670e5996857STim J. Robbins 	crange *newranges;
1671e5996857STim J. Robbins 
1672e5996857STim J. Robbins 	for (; min < NC && min <= max; min++)
1673e5996857STim J. Robbins 		CHadd(p, cs, min);
1674e5996857STim J. Robbins 	if (min >= max)
1675e5996857STim J. Robbins 		return;
16769f36610fSPedro F. Giffuni 	newranges = reallocarray(cs->ranges, cs->nranges + 1,
1677e5996857STim J. Robbins 	    sizeof(*cs->ranges));
1678e5996857STim J. Robbins 	if (newranges == NULL) {
1679e5996857STim J. Robbins 		SETERROR(REG_ESPACE);
1680e5996857STim J. Robbins 		return;
1681e5996857STim J. Robbins 	}
1682e5996857STim J. Robbins 	cs->ranges = newranges;
1683e5996857STim J. Robbins 	cs->ranges[cs->nranges].min = min;
1684d0ebccdeSXin LI 	cs->ranges[cs->nranges].max = max;
1685e5996857STim J. Robbins 	cs->nranges++;
1686e5996857STim J. Robbins }
1687e5996857STim J. Robbins 
1688e5996857STim J. Robbins /*
1689e5996857STim J. Robbins  - CHaddtype - add all characters of a certain type to a character set.
1690e5996857STim J. Robbins  */
1691e5996857STim J. Robbins static void
169254a648d1SXin LI CHaddtype(struct parse *p, cset *cs, wctype_t wct)
1693e5996857STim J. Robbins {
1694e5996857STim J. Robbins 	wint_t i;
1695e5996857STim J. Robbins 	wctype_t *newtypes;
1696e5996857STim J. Robbins 
169733176f17STim J. Robbins 	for (i = 0; i < NC; i++)
1698e5996857STim J. Robbins 		if (iswctype(i, wct))
1699e5996857STim J. Robbins 			CHadd(p, cs, i);
17009f36610fSPedro F. Giffuni 	newtypes = reallocarray(cs->types, cs->ntypes + 1,
1701e5996857STim J. Robbins 	    sizeof(*cs->types));
1702e5996857STim J. Robbins 	if (newtypes == NULL) {
1703e5996857STim J. Robbins 		SETERROR(REG_ESPACE);
1704e5996857STim J. Robbins 		return;
1705e5996857STim J. Robbins 	}
1706e5996857STim J. Robbins 	cs->types = newtypes;
1707e5996857STim J. Robbins 	cs->types[cs->ntypes++] = wct;
170858f0484fSRodney W. Grimes }
170958f0484fSRodney W. Grimes 
171058f0484fSRodney W. Grimes /*
171158f0484fSRodney W. Grimes  - dupl - emit a duplicate of a bunch of sops
17128fb3f3f6SDavid E. O'Brien  == static sopno dupl(struct parse *p, sopno start, sopno finish);
171358f0484fSRodney W. Grimes  */
171458f0484fSRodney W. Grimes static sopno			/* start of duplicate */
171554a648d1SXin LI dupl(struct parse *p,
171654a648d1SXin LI 	sopno start,		/* from here */
171754a648d1SXin LI 	sopno finish)		/* to this less one */
171858f0484fSRodney W. Grimes {
17198fb3f3f6SDavid E. O'Brien 	sopno ret = HERE();
17208fb3f3f6SDavid E. O'Brien 	sopno len = finish - start;
172158f0484fSRodney W. Grimes 
172258f0484fSRodney W. Grimes 	assert(finish >= start);
172358f0484fSRodney W. Grimes 	if (len == 0)
172458f0484fSRodney W. Grimes 		return(ret);
17252bf213ebSKevin Lo 	if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
17262bf213ebSKevin Lo 		return(ret);
172758f0484fSRodney W. Grimes 	(void) memcpy((char *)(p->strip + p->slen),
172858f0484fSRodney W. Grimes 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
172958f0484fSRodney W. Grimes 	p->slen += len;
173058f0484fSRodney W. Grimes 	return(ret);
173158f0484fSRodney W. Grimes }
173258f0484fSRodney W. Grimes 
173358f0484fSRodney W. Grimes /*
173458f0484fSRodney W. Grimes  - doemit - emit a strip operator
17358fb3f3f6SDavid E. O'Brien  == static void doemit(struct parse *p, sop op, size_t opnd);
173658f0484fSRodney W. Grimes  *
173758f0484fSRodney W. Grimes  * It might seem better to implement this as a macro with a function as
173858f0484fSRodney W. Grimes  * hard-case backup, but it's just too big and messy unless there are
173958f0484fSRodney W. Grimes  * some changes to the data structures.  Maybe later.
174058f0484fSRodney W. Grimes  */
174158f0484fSRodney W. Grimes static void
174254a648d1SXin LI doemit(struct parse *p, sop op, size_t opnd)
174358f0484fSRodney W. Grimes {
174458f0484fSRodney W. Grimes 	/* avoid making error situations worse */
174558f0484fSRodney W. Grimes 	if (p->error != 0)
174658f0484fSRodney W. Grimes 		return;
174758f0484fSRodney W. Grimes 
174858f0484fSRodney W. Grimes 	/* deal with oversize operands ("can't happen", more or less) */
174958f0484fSRodney W. Grimes 	assert(opnd < 1<<OPSHIFT);
175058f0484fSRodney W. Grimes 
175158f0484fSRodney W. Grimes 	/* deal with undersized strip */
175258f0484fSRodney W. Grimes 	if (p->slen >= p->ssize)
17532bf213ebSKevin Lo 		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
17542bf213ebSKevin Lo 			return;
175558f0484fSRodney W. Grimes 
175658f0484fSRodney W. Grimes 	/* finally, it's all reduced to the easy case */
175758f0484fSRodney W. Grimes 	p->strip[p->slen++] = SOP(op, opnd);
175858f0484fSRodney W. Grimes }
175958f0484fSRodney W. Grimes 
176058f0484fSRodney W. Grimes /*
176158f0484fSRodney W. Grimes  - doinsert - insert a sop into the strip
17628fb3f3f6SDavid E. O'Brien  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
176358f0484fSRodney W. Grimes  */
176458f0484fSRodney W. Grimes static void
176554a648d1SXin LI doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
176658f0484fSRodney W. Grimes {
17678fb3f3f6SDavid E. O'Brien 	sopno sn;
17688fb3f3f6SDavid E. O'Brien 	sop s;
17698fb3f3f6SDavid E. O'Brien 	int i;
177058f0484fSRodney W. Grimes 
177158f0484fSRodney W. Grimes 	/* avoid making error situations worse */
177258f0484fSRodney W. Grimes 	if (p->error != 0)
177358f0484fSRodney W. Grimes 		return;
177458f0484fSRodney W. Grimes 
177558f0484fSRodney W. Grimes 	sn = HERE();
177658f0484fSRodney W. Grimes 	EMIT(op, opnd);		/* do checks, ensure space */
177758f0484fSRodney W. Grimes 	assert(HERE() == sn+1);
177858f0484fSRodney W. Grimes 	s = p->strip[sn];
177958f0484fSRodney W. Grimes 
178058f0484fSRodney W. Grimes 	/* adjust paren pointers */
178158f0484fSRodney W. Grimes 	assert(pos > 0);
178258f0484fSRodney W. Grimes 	for (i = 1; i < NPAREN; i++) {
178358f0484fSRodney W. Grimes 		if (p->pbegin[i] >= pos) {
178458f0484fSRodney W. Grimes 			p->pbegin[i]++;
178558f0484fSRodney W. Grimes 		}
178658f0484fSRodney W. Grimes 		if (p->pend[i] >= pos) {
178758f0484fSRodney W. Grimes 			p->pend[i]++;
178858f0484fSRodney W. Grimes 		}
178958f0484fSRodney W. Grimes 	}
179058f0484fSRodney W. Grimes 
179158f0484fSRodney W. Grimes 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
179258f0484fSRodney W. Grimes 						(HERE()-pos-1)*sizeof(sop));
179358f0484fSRodney W. Grimes 	p->strip[pos] = s;
179458f0484fSRodney W. Grimes }
179558f0484fSRodney W. Grimes 
179658f0484fSRodney W. Grimes /*
179758f0484fSRodney W. Grimes  - dofwd - complete a forward reference
17988fb3f3f6SDavid E. O'Brien  == static void dofwd(struct parse *p, sopno pos, sop value);
179958f0484fSRodney W. Grimes  */
180058f0484fSRodney W. Grimes static void
180154a648d1SXin LI dofwd(struct parse *p, sopno pos, sop value)
180258f0484fSRodney W. Grimes {
180358f0484fSRodney W. Grimes 	/* avoid making error situations worse */
180458f0484fSRodney W. Grimes 	if (p->error != 0)
180558f0484fSRodney W. Grimes 		return;
180658f0484fSRodney W. Grimes 
180758f0484fSRodney W. Grimes 	assert(value < 1<<OPSHIFT);
180858f0484fSRodney W. Grimes 	p->strip[pos] = OP(p->strip[pos]) | value;
180958f0484fSRodney W. Grimes }
181058f0484fSRodney W. Grimes 
181158f0484fSRodney W. Grimes /*
181258f0484fSRodney W. Grimes  - enlarge - enlarge the strip
18132bf213ebSKevin Lo  == static int enlarge(struct parse *p, sopno size);
181458f0484fSRodney W. Grimes  */
18152bf213ebSKevin Lo static int
181654a648d1SXin LI enlarge(struct parse *p, sopno size)
181758f0484fSRodney W. Grimes {
18188fb3f3f6SDavid E. O'Brien 	sop *sp;
181958f0484fSRodney W. Grimes 
182058f0484fSRodney W. Grimes 	if (p->ssize >= size)
18212bf213ebSKevin Lo 		return 1;
182258f0484fSRodney W. Grimes 
18239f36610fSPedro F. Giffuni 	sp = reallocarray(p->strip, size, sizeof(sop));
182458f0484fSRodney W. Grimes 	if (sp == NULL) {
182558f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
18262bf213ebSKevin Lo 		return 0;
182758f0484fSRodney W. Grimes 	}
182858f0484fSRodney W. Grimes 	p->strip = sp;
182958f0484fSRodney W. Grimes 	p->ssize = size;
18302bf213ebSKevin Lo 	return 1;
183158f0484fSRodney W. Grimes }
183258f0484fSRodney W. Grimes 
183358f0484fSRodney W. Grimes /*
183458f0484fSRodney W. Grimes  - stripsnug - compact the strip
18358fb3f3f6SDavid E. O'Brien  == static void stripsnug(struct parse *p, struct re_guts *g);
183658f0484fSRodney W. Grimes  */
183758f0484fSRodney W. Grimes static void
183854a648d1SXin LI stripsnug(struct parse *p, struct re_guts *g)
183958f0484fSRodney W. Grimes {
184058f0484fSRodney W. Grimes 	g->nstates = p->slen;
18419f36610fSPedro F. Giffuni 	g->strip = reallocarray((char *)p->strip, p->slen, sizeof(sop));
184258f0484fSRodney W. Grimes 	if (g->strip == NULL) {
184358f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
184458f0484fSRodney W. Grimes 		g->strip = p->strip;
184558f0484fSRodney W. Grimes 	}
184658f0484fSRodney W. Grimes }
184758f0484fSRodney W. Grimes 
184858f0484fSRodney W. Grimes /*
184958f0484fSRodney W. Grimes  - findmust - fill in must and mlen with longest mandatory literal string
18508fb3f3f6SDavid E. O'Brien  == static void findmust(struct parse *p, struct re_guts *g);
185158f0484fSRodney W. Grimes  *
185258f0484fSRodney W. Grimes  * This algorithm could do fancy things like analyzing the operands of |
185358f0484fSRodney W. Grimes  * for common subsequences.  Someday.  This code is simple and finds most
185458f0484fSRodney W. Grimes  * of the interesting cases.
185558f0484fSRodney W. Grimes  *
185658f0484fSRodney W. Grimes  * Note that must and mlen got initialized during setup.
185758f0484fSRodney W. Grimes  */
185858f0484fSRodney W. Grimes static void
185954a648d1SXin LI findmust(struct parse *p, struct re_guts *g)
186058f0484fSRodney W. Grimes {
18618fb3f3f6SDavid E. O'Brien 	sop *scan;
1862d58abbfeSPedro F. Giffuni 	sop *start = NULL;
1863d58abbfeSPedro F. Giffuni 	sop *newstart = NULL;
18648fb3f3f6SDavid E. O'Brien 	sopno newlen;
18658fb3f3f6SDavid E. O'Brien 	sop s;
18668fb3f3f6SDavid E. O'Brien 	char *cp;
1867e6a886d8SDaniel C. Sobral 	int offset;
1868e5996857STim J. Robbins 	char buf[MB_LEN_MAX];
1869e5996857STim J. Robbins 	size_t clen;
1870e5996857STim J. Robbins 	mbstate_t mbs;
187158f0484fSRodney W. Grimes 
187258f0484fSRodney W. Grimes 	/* avoid making error situations worse */
187358f0484fSRodney W. Grimes 	if (p->error != 0)
187458f0484fSRodney W. Grimes 		return;
187558f0484fSRodney W. Grimes 
1876e5996857STim J. Robbins 	/*
1877e5996857STim J. Robbins 	 * It's not generally safe to do a ``char'' substring search on
1878e5996857STim J. Robbins 	 * multibyte character strings, but it's safe for at least
1879e5996857STim J. Robbins 	 * UTF-8 (see RFC 3629).
1880e5996857STim J. Robbins 	 */
1881e5996857STim J. Robbins 	if (MB_CUR_MAX > 1 &&
1882e5996857STim J. Robbins 	    strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
1883e5996857STim J. Robbins 		return;
1884e5996857STim J. Robbins 
188558f0484fSRodney W. Grimes 	/* find the longest OCHAR sequence in strip */
188658f0484fSRodney W. Grimes 	newlen = 0;
1887e6a886d8SDaniel C. Sobral 	offset = 0;
1888e6a886d8SDaniel C. Sobral 	g->moffset = 0;
188958f0484fSRodney W. Grimes 	scan = g->strip + 1;
189058f0484fSRodney W. Grimes 	do {
189158f0484fSRodney W. Grimes 		s = *scan++;
189258f0484fSRodney W. Grimes 		switch (OP(s)) {
189358f0484fSRodney W. Grimes 		case OCHAR:		/* sequence member */
1894e5996857STim J. Robbins 			if (newlen == 0) {		/* new sequence */
1895e5996857STim J. Robbins 				memset(&mbs, 0, sizeof(mbs));
189658f0484fSRodney W. Grimes 				newstart = scan - 1;
1897e5996857STim J. Robbins 			}
1898e5996857STim J. Robbins 			clen = wcrtomb(buf, OPND(s), &mbs);
1899e5996857STim J. Robbins 			if (clen == (size_t)-1)
1900e5996857STim J. Robbins 				goto toohard;
1901e5996857STim J. Robbins 			newlen += clen;
190258f0484fSRodney W. Grimes 			break;
190358f0484fSRodney W. Grimes 		case OPLUS_:		/* things that don't break one */
190458f0484fSRodney W. Grimes 		case OLPAREN:
190558f0484fSRodney W. Grimes 		case ORPAREN:
190658f0484fSRodney W. Grimes 			break;
190758f0484fSRodney W. Grimes 		case OQUEST_:		/* things that must be skipped */
190858f0484fSRodney W. Grimes 		case OCH_:
190980f36366STim J. Robbins 			offset = altoffset(scan, offset);
191058f0484fSRodney W. Grimes 			scan--;
191158f0484fSRodney W. Grimes 			do {
191258f0484fSRodney W. Grimes 				scan += OPND(s);
191358f0484fSRodney W. Grimes 				s = *scan;
191458f0484fSRodney W. Grimes 				/* assert() interferes w debug printouts */
19154f8f1c79SKyle Evans 				if (OP(s) != (sop)O_QUEST &&
19164f8f1c79SKyle Evans 				    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2) {
191758f0484fSRodney W. Grimes 					g->iflags |= BAD;
191858f0484fSRodney W. Grimes 					return;
191958f0484fSRodney W. Grimes 				}
19204f8f1c79SKyle Evans 			} while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
19217fed38d0SPhilippe Charnier 			/* FALLTHROUGH */
1922e6a886d8SDaniel C. Sobral 		case OBOW:		/* things that break a sequence */
1923e6a886d8SDaniel C. Sobral 		case OEOW:
1924e6a886d8SDaniel C. Sobral 		case OBOL:
1925e6a886d8SDaniel C. Sobral 		case OEOL:
1926ca53e5aeSKyle Evans 		case OBOS:
1927ca53e5aeSKyle Evans 		case OEOS:
19286b986646SKyle Evans 		case OWBND:
19296b986646SKyle Evans 		case ONWBND:
1930e6a886d8SDaniel C. Sobral 		case O_QUEST:
1931e6a886d8SDaniel C. Sobral 		case O_CH:
1932e6a886d8SDaniel C. Sobral 		case OEND:
19334f8f1c79SKyle Evans 			if (newlen > (sopno)g->mlen) {		/* ends one */
193458f0484fSRodney W. Grimes 				start = newstart;
193558f0484fSRodney W. Grimes 				g->mlen = newlen;
1936e6a886d8SDaniel C. Sobral 				if (offset > -1) {
1937e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1938e6a886d8SDaniel C. Sobral 					offset = newlen;
1939e6a886d8SDaniel C. Sobral 				} else
1940e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1941e6a886d8SDaniel C. Sobral 			} else {
1942e6a886d8SDaniel C. Sobral 				if (offset > -1)
1943e6a886d8SDaniel C. Sobral 					offset += newlen;
194458f0484fSRodney W. Grimes 			}
194558f0484fSRodney W. Grimes 			newlen = 0;
194658f0484fSRodney W. Grimes 			break;
1947e6a886d8SDaniel C. Sobral 		case OANY:
19484f8f1c79SKyle Evans 			if (newlen > (sopno)g->mlen) {		/* ends one */
1949e6a886d8SDaniel C. Sobral 				start = newstart;
1950e6a886d8SDaniel C. Sobral 				g->mlen = newlen;
1951e6a886d8SDaniel C. Sobral 				if (offset > -1) {
1952e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1953e6a886d8SDaniel C. Sobral 					offset = newlen;
1954e6a886d8SDaniel C. Sobral 				} else
1955e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1956e6a886d8SDaniel C. Sobral 			} else {
1957e6a886d8SDaniel C. Sobral 				if (offset > -1)
1958e6a886d8SDaniel C. Sobral 					offset += newlen;
1959e6a886d8SDaniel C. Sobral 			}
1960e6a886d8SDaniel C. Sobral 			if (offset > -1)
1961e6a886d8SDaniel C. Sobral 				offset++;
1962e6a886d8SDaniel C. Sobral 			newlen = 0;
1963e6a886d8SDaniel C. Sobral 			break;
1964e6a886d8SDaniel C. Sobral 		case OANYOF:		/* may or may not invalidate offset */
1965e6a886d8SDaniel C. Sobral 			/* First, everything as OANY */
19664f8f1c79SKyle Evans 			if (newlen > (sopno)g->mlen) {		/* ends one */
1967e6a886d8SDaniel C. Sobral 				start = newstart;
1968e6a886d8SDaniel C. Sobral 				g->mlen = newlen;
1969e6a886d8SDaniel C. Sobral 				if (offset > -1) {
1970e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1971e6a886d8SDaniel C. Sobral 					offset = newlen;
1972e6a886d8SDaniel C. Sobral 				} else
1973e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1974e6a886d8SDaniel C. Sobral 			} else {
1975e6a886d8SDaniel C. Sobral 				if (offset > -1)
1976e6a886d8SDaniel C. Sobral 					offset += newlen;
1977e6a886d8SDaniel C. Sobral 			}
1978e6a886d8SDaniel C. Sobral 			if (offset > -1)
1979e6a886d8SDaniel C. Sobral 				offset++;
1980e6a886d8SDaniel C. Sobral 			newlen = 0;
1981e6a886d8SDaniel C. Sobral 			break;
1982e5996857STim J. Robbins 		toohard:
1983e6a886d8SDaniel C. Sobral 		default:
1984e6a886d8SDaniel C. Sobral 			/* Anything here makes it impossible or too hard
1985e6a886d8SDaniel C. Sobral 			 * to calculate the offset -- so we give up;
1986e6a886d8SDaniel C. Sobral 			 * save the last known good offset, in case the
1987e6a886d8SDaniel C. Sobral 			 * must sequence doesn't occur later.
1988e6a886d8SDaniel C. Sobral 			 */
19894f8f1c79SKyle Evans 			if (newlen > (sopno)g->mlen) {		/* ends one */
1990e6a886d8SDaniel C. Sobral 				start = newstart;
1991e6a886d8SDaniel C. Sobral 				g->mlen = newlen;
1992e6a886d8SDaniel C. Sobral 				if (offset > -1)
1993e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1994e6a886d8SDaniel C. Sobral 				else
1995e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1996e6a886d8SDaniel C. Sobral 			}
1997e6a886d8SDaniel C. Sobral 			offset = -1;
1998e6a886d8SDaniel C. Sobral 			newlen = 0;
1999e6a886d8SDaniel C. Sobral 			break;
200058f0484fSRodney W. Grimes 		}
200158f0484fSRodney W. Grimes 	} while (OP(s) != OEND);
200258f0484fSRodney W. Grimes 
2003e6a886d8SDaniel C. Sobral 	if (g->mlen == 0) {		/* there isn't one */
2004e6a886d8SDaniel C. Sobral 		g->moffset = -1;
200558f0484fSRodney W. Grimes 		return;
2006e6a886d8SDaniel C. Sobral 	}
200758f0484fSRodney W. Grimes 
200858f0484fSRodney W. Grimes 	/* turn it into a character string */
200958f0484fSRodney W. Grimes 	g->must = malloc((size_t)g->mlen + 1);
201058f0484fSRodney W. Grimes 	if (g->must == NULL) {		/* argh; just forget it */
201158f0484fSRodney W. Grimes 		g->mlen = 0;
2012e6a886d8SDaniel C. Sobral 		g->moffset = -1;
201358f0484fSRodney W. Grimes 		return;
201458f0484fSRodney W. Grimes 	}
201558f0484fSRodney W. Grimes 	cp = g->must;
201658f0484fSRodney W. Grimes 	scan = start;
2017e5996857STim J. Robbins 	memset(&mbs, 0, sizeof(mbs));
2018e5996857STim J. Robbins 	while (cp < g->must + g->mlen) {
201958f0484fSRodney W. Grimes 		while (OP(s = *scan++) != OCHAR)
202058f0484fSRodney W. Grimes 			continue;
2021e5996857STim J. Robbins 		clen = wcrtomb(cp, OPND(s), &mbs);
2022e5996857STim J. Robbins 		assert(clen != (size_t)-1);
2023e5996857STim J. Robbins 		cp += clen;
202458f0484fSRodney W. Grimes 	}
202558f0484fSRodney W. Grimes 	assert(cp == g->must + g->mlen);
202658f0484fSRodney W. Grimes 	*cp++ = '\0';		/* just on general principles */
202758f0484fSRodney W. Grimes }
202858f0484fSRodney W. Grimes 
202958f0484fSRodney W. Grimes /*
2030e6a886d8SDaniel C. Sobral  - altoffset - choose biggest offset among multiple choices
203180f36366STim J. Robbins  == static int altoffset(sop *scan, int offset);
2032e6a886d8SDaniel C. Sobral  *
2033e6a886d8SDaniel C. Sobral  * Compute, recursively if necessary, the largest offset among multiple
2034e6a886d8SDaniel C. Sobral  * re paths.
2035e6a886d8SDaniel C. Sobral  */
2036e6a886d8SDaniel C. Sobral static int
203754a648d1SXin LI altoffset(sop *scan, int offset)
2038e6a886d8SDaniel C. Sobral {
2039e6a886d8SDaniel C. Sobral 	int largest;
2040e6a886d8SDaniel C. Sobral 	int try;
2041e6a886d8SDaniel C. Sobral 	sop s;
2042e6a886d8SDaniel C. Sobral 
2043e6a886d8SDaniel C. Sobral 	/* If we gave up already on offsets, return */
2044e6a886d8SDaniel C. Sobral 	if (offset == -1)
2045e6a886d8SDaniel C. Sobral 		return -1;
2046e6a886d8SDaniel C. Sobral 
2047e6a886d8SDaniel C. Sobral 	largest = 0;
2048e6a886d8SDaniel C. Sobral 	try = 0;
2049e6a886d8SDaniel C. Sobral 	s = *scan++;
20504f8f1c79SKyle Evans 	while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH) {
2051e6a886d8SDaniel C. Sobral 		switch (OP(s)) {
2052e6a886d8SDaniel C. Sobral 		case OOR1:
2053e6a886d8SDaniel C. Sobral 			if (try > largest)
2054e6a886d8SDaniel C. Sobral 				largest = try;
2055e6a886d8SDaniel C. Sobral 			try = 0;
2056e6a886d8SDaniel C. Sobral 			break;
2057e6a886d8SDaniel C. Sobral 		case OQUEST_:
2058e6a886d8SDaniel C. Sobral 		case OCH_:
205980f36366STim J. Robbins 			try = altoffset(scan, try);
2060e6a886d8SDaniel C. Sobral 			if (try == -1)
2061e6a886d8SDaniel C. Sobral 				return -1;
2062e6a886d8SDaniel C. Sobral 			scan--;
2063e6a886d8SDaniel C. Sobral 			do {
2064e6a886d8SDaniel C. Sobral 				scan += OPND(s);
2065e6a886d8SDaniel C. Sobral 				s = *scan;
20664f8f1c79SKyle Evans 				if (OP(s) != (sop)O_QUEST &&
20674f8f1c79SKyle Evans 				    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2)
2068e6a886d8SDaniel C. Sobral 					return -1;
20694f8f1c79SKyle Evans 			} while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
20708f9e434fSDaniel C. Sobral 			/* We must skip to the next position, or we'll
20718f9e434fSDaniel C. Sobral 			 * leave altoffset() too early.
20728f9e434fSDaniel C. Sobral 			 */
20738f9e434fSDaniel C. Sobral 			scan++;
2074e6a886d8SDaniel C. Sobral 			break;
2075e6a886d8SDaniel C. Sobral 		case OANYOF:
2076e6a886d8SDaniel C. Sobral 		case OCHAR:
2077e6a886d8SDaniel C. Sobral 		case OANY:
2078e6a886d8SDaniel C. Sobral 			try++;
2079e6a886d8SDaniel C. Sobral 		case OBOW:
2080e6a886d8SDaniel C. Sobral 		case OEOW:
20816b986646SKyle Evans 		case OWBND:
20826b986646SKyle Evans 		case ONWBND:
2083e6a886d8SDaniel C. Sobral 		case OLPAREN:
2084e6a886d8SDaniel C. Sobral 		case ORPAREN:
2085e6a886d8SDaniel C. Sobral 		case OOR2:
2086e6a886d8SDaniel C. Sobral 			break;
2087e6a886d8SDaniel C. Sobral 		default:
2088e6a886d8SDaniel C. Sobral 			try = -1;
2089e6a886d8SDaniel C. Sobral 			break;
2090e6a886d8SDaniel C. Sobral 		}
2091e6a886d8SDaniel C. Sobral 		if (try == -1)
2092e6a886d8SDaniel C. Sobral 			return -1;
2093e6a886d8SDaniel C. Sobral 		s = *scan++;
2094e6a886d8SDaniel C. Sobral 	}
2095e6a886d8SDaniel C. Sobral 
2096e6a886d8SDaniel C. Sobral 	if (try > largest)
2097e6a886d8SDaniel C. Sobral 		largest = try;
2098e6a886d8SDaniel C. Sobral 
2099e6a886d8SDaniel C. Sobral 	return largest+offset;
2100e6a886d8SDaniel C. Sobral }
2101e6a886d8SDaniel C. Sobral 
2102e6a886d8SDaniel C. Sobral /*
21036049d9f0SDaniel C. Sobral  - computejumps - compute char jumps for BM scan
21048fb3f3f6SDavid E. O'Brien  == static void computejumps(struct parse *p, struct re_guts *g);
21056049d9f0SDaniel C. Sobral  *
21066049d9f0SDaniel C. Sobral  * This algorithm assumes g->must exists and is has size greater than
21076049d9f0SDaniel C. Sobral  * zero. It's based on the algorithm found on Computer Algorithms by
21086049d9f0SDaniel C. Sobral  * Sara Baase.
21096049d9f0SDaniel C. Sobral  *
21106049d9f0SDaniel C. Sobral  * A char jump is the number of characters one needs to jump based on
21116049d9f0SDaniel C. Sobral  * the value of the character from the text that was mismatched.
21126049d9f0SDaniel C. Sobral  */
21136049d9f0SDaniel C. Sobral static void
211454a648d1SXin LI computejumps(struct parse *p, struct re_guts *g)
21156049d9f0SDaniel C. Sobral {
21166049d9f0SDaniel C. Sobral 	int ch;
21176049d9f0SDaniel C. Sobral 	int mindex;
21186049d9f0SDaniel C. Sobral 
21196049d9f0SDaniel C. Sobral 	/* Avoid making errors worse */
21206049d9f0SDaniel C. Sobral 	if (p->error != 0)
21216049d9f0SDaniel C. Sobral 		return;
21226049d9f0SDaniel C. Sobral 
2123e2a87ae3SYuri Pankov 	g->charjump = (int *)malloc((NC_MAX + 1) * sizeof(int));
21246049d9f0SDaniel C. Sobral 	if (g->charjump == NULL)	/* Not a fatal error */
21256049d9f0SDaniel C. Sobral 		return;
2126c5e125bbSDaniel C. Sobral 	/* Adjust for signed chars, if necessary */
2127e2a87ae3SYuri Pankov 	g->charjump = &g->charjump[-(CHAR_MIN)];
21286049d9f0SDaniel C. Sobral 
21296049d9f0SDaniel C. Sobral 	/* If the character does not exist in the pattern, the jump
21306049d9f0SDaniel C. Sobral 	 * is equal to the number of characters in the pattern.
21316049d9f0SDaniel C. Sobral 	 */
2132e2a87ae3SYuri Pankov 	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
21336049d9f0SDaniel C. Sobral 		g->charjump[ch] = g->mlen;
21346049d9f0SDaniel C. Sobral 
21356049d9f0SDaniel C. Sobral 	/* If the character does exist, compute the jump that would
21366049d9f0SDaniel C. Sobral 	 * take us to the last character in the pattern equal to it
21376049d9f0SDaniel C. Sobral 	 * (notice that we match right to left, so that last character
21386049d9f0SDaniel C. Sobral 	 * is the first one that would be matched).
21396049d9f0SDaniel C. Sobral 	 */
21406049d9f0SDaniel C. Sobral 	for (mindex = 0; mindex < g->mlen; mindex++)
2141e0554a53SJacques Vidrine 		g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
21426049d9f0SDaniel C. Sobral }
21436049d9f0SDaniel C. Sobral 
21446049d9f0SDaniel C. Sobral /*
21456049d9f0SDaniel C. Sobral  - computematchjumps - compute match jumps for BM scan
21468fb3f3f6SDavid E. O'Brien  == static void computematchjumps(struct parse *p, struct re_guts *g);
21476049d9f0SDaniel C. Sobral  *
21486049d9f0SDaniel C. Sobral  * This algorithm assumes g->must exists and is has size greater than
21496049d9f0SDaniel C. Sobral  * zero. It's based on the algorithm found on Computer Algorithms by
21506049d9f0SDaniel C. Sobral  * Sara Baase.
21516049d9f0SDaniel C. Sobral  *
21526049d9f0SDaniel C. Sobral  * A match jump is the number of characters one needs to advance based
21536049d9f0SDaniel C. Sobral  * on the already-matched suffix.
21546049d9f0SDaniel C. Sobral  * Notice that all values here are minus (g->mlen-1), because of the way
21556049d9f0SDaniel C. Sobral  * the search algorithm works.
21566049d9f0SDaniel C. Sobral  */
21576049d9f0SDaniel C. Sobral static void
215854a648d1SXin LI computematchjumps(struct parse *p, struct re_guts *g)
21596049d9f0SDaniel C. Sobral {
21606049d9f0SDaniel C. Sobral 	int mindex;		/* General "must" iterator */
21616049d9f0SDaniel C. Sobral 	int suffix;		/* Keeps track of matching suffix */
21626049d9f0SDaniel C. Sobral 	int ssuffix;		/* Keeps track of suffixes' suffix */
21636049d9f0SDaniel C. Sobral 	int* pmatches;		/* pmatches[k] points to the next i
21646049d9f0SDaniel C. Sobral 				 * such that i+1...mlen is a substring
21656049d9f0SDaniel C. Sobral 				 * of k+1...k+mlen-i-1
21666049d9f0SDaniel C. Sobral 				 */
21676049d9f0SDaniel C. Sobral 
21686049d9f0SDaniel C. Sobral 	/* Avoid making errors worse */
21696049d9f0SDaniel C. Sobral 	if (p->error != 0)
21706049d9f0SDaniel C. Sobral 		return;
21716049d9f0SDaniel C. Sobral 
217212eb0bcaSPedro F. Giffuni 	pmatches = (int*) malloc(g->mlen * sizeof(int));
21736049d9f0SDaniel C. Sobral 	if (pmatches == NULL) {
21746049d9f0SDaniel C. Sobral 		g->matchjump = NULL;
21756049d9f0SDaniel C. Sobral 		return;
21766049d9f0SDaniel C. Sobral 	}
21776049d9f0SDaniel C. Sobral 
217812eb0bcaSPedro F. Giffuni 	g->matchjump = (int*) malloc(g->mlen * sizeof(int));
2179cfbebadcSXin LI 	if (g->matchjump == NULL) {	/* Not a fatal error */
2180cfbebadcSXin LI 		free(pmatches);
21816049d9f0SDaniel C. Sobral 		return;
2182cfbebadcSXin LI 	}
21836049d9f0SDaniel C. Sobral 
21846049d9f0SDaniel C. Sobral 	/* Set maximum possible jump for each character in the pattern */
21856049d9f0SDaniel C. Sobral 	for (mindex = 0; mindex < g->mlen; mindex++)
21866049d9f0SDaniel C. Sobral 		g->matchjump[mindex] = 2*g->mlen - mindex - 1;
21876049d9f0SDaniel C. Sobral 
21886049d9f0SDaniel C. Sobral 	/* Compute pmatches[] */
21896049d9f0SDaniel C. Sobral 	for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
21906049d9f0SDaniel C. Sobral 	    mindex--, suffix--) {
21916049d9f0SDaniel C. Sobral 		pmatches[mindex] = suffix;
21926049d9f0SDaniel C. Sobral 
21936049d9f0SDaniel C. Sobral 		/* If a mismatch is found, interrupting the substring,
21946049d9f0SDaniel C. Sobral 		 * compute the matchjump for that position. If no
21956049d9f0SDaniel C. Sobral 		 * mismatch is found, then a text substring mismatched
21966049d9f0SDaniel C. Sobral 		 * against the suffix will also mismatch against the
21976049d9f0SDaniel C. Sobral 		 * substring.
21986049d9f0SDaniel C. Sobral 		 */
21996049d9f0SDaniel C. Sobral 		while (suffix < g->mlen
22006049d9f0SDaniel C. Sobral 		    && g->must[mindex] != g->must[suffix]) {
22016049d9f0SDaniel C. Sobral 			g->matchjump[suffix] = MIN(g->matchjump[suffix],
22026049d9f0SDaniel C. Sobral 			    g->mlen - mindex - 1);
22036049d9f0SDaniel C. Sobral 			suffix = pmatches[suffix];
22046049d9f0SDaniel C. Sobral 		}
22056049d9f0SDaniel C. Sobral 	}
22066049d9f0SDaniel C. Sobral 
22076049d9f0SDaniel C. Sobral 	/* Compute the matchjump up to the last substring found to jump
22086049d9f0SDaniel C. Sobral 	 * to the beginning of the largest must pattern prefix matching
22096049d9f0SDaniel C. Sobral 	 * it's own suffix.
22106049d9f0SDaniel C. Sobral 	 */
22116049d9f0SDaniel C. Sobral 	for (mindex = 0; mindex <= suffix; mindex++)
22126049d9f0SDaniel C. Sobral 		g->matchjump[mindex] = MIN(g->matchjump[mindex],
22136049d9f0SDaniel C. Sobral 		    g->mlen + suffix - mindex);
22146049d9f0SDaniel C. Sobral 
22156049d9f0SDaniel C. Sobral         ssuffix = pmatches[suffix];
22166049d9f0SDaniel C. Sobral         while (suffix < g->mlen) {
22179868274bSDaniel C. Sobral                 while (suffix <= ssuffix && suffix < g->mlen) {
22186049d9f0SDaniel C. Sobral                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
22196049d9f0SDaniel C. Sobral 			    g->mlen + ssuffix - suffix);
22206049d9f0SDaniel C. Sobral                         suffix++;
22216049d9f0SDaniel C. Sobral                 }
22228e2f75b8SDaniel C. Sobral 		if (suffix < g->mlen)
22236049d9f0SDaniel C. Sobral                 	ssuffix = pmatches[ssuffix];
22246049d9f0SDaniel C. Sobral         }
22256049d9f0SDaniel C. Sobral 
22266049d9f0SDaniel C. Sobral 	free(pmatches);
22276049d9f0SDaniel C. Sobral }
22286049d9f0SDaniel C. Sobral 
22296049d9f0SDaniel C. Sobral /*
223058f0484fSRodney W. Grimes  - pluscount - count + nesting
22318fb3f3f6SDavid E. O'Brien  == static sopno pluscount(struct parse *p, struct re_guts *g);
223258f0484fSRodney W. Grimes  */
223358f0484fSRodney W. Grimes static sopno			/* nesting depth */
223454a648d1SXin LI pluscount(struct parse *p, struct re_guts *g)
223558f0484fSRodney W. Grimes {
22368fb3f3f6SDavid E. O'Brien 	sop *scan;
22378fb3f3f6SDavid E. O'Brien 	sop s;
22388fb3f3f6SDavid E. O'Brien 	sopno plusnest = 0;
22398fb3f3f6SDavid E. O'Brien 	sopno maxnest = 0;
224058f0484fSRodney W. Grimes 
224158f0484fSRodney W. Grimes 	if (p->error != 0)
224258f0484fSRodney W. Grimes 		return(0);	/* there may not be an OEND */
224358f0484fSRodney W. Grimes 
224458f0484fSRodney W. Grimes 	scan = g->strip + 1;
224558f0484fSRodney W. Grimes 	do {
224658f0484fSRodney W. Grimes 		s = *scan++;
224758f0484fSRodney W. Grimes 		switch (OP(s)) {
224858f0484fSRodney W. Grimes 		case OPLUS_:
224958f0484fSRodney W. Grimes 			plusnest++;
225058f0484fSRodney W. Grimes 			break;
225158f0484fSRodney W. Grimes 		case O_PLUS:
225258f0484fSRodney W. Grimes 			if (plusnest > maxnest)
225358f0484fSRodney W. Grimes 				maxnest = plusnest;
225458f0484fSRodney W. Grimes 			plusnest--;
225558f0484fSRodney W. Grimes 			break;
225658f0484fSRodney W. Grimes 		}
225758f0484fSRodney W. Grimes 	} while (OP(s) != OEND);
225858f0484fSRodney W. Grimes 	if (plusnest != 0)
225958f0484fSRodney W. Grimes 		g->iflags |= BAD;
226058f0484fSRodney W. Grimes 	return(maxnest);
226158f0484fSRodney W. Grimes }
2262