10b57cec5SDimitry Andric /*-
20b57cec5SDimitry Andric  * This code is derived from OpenBSD's libc/regex, original license follows:
30b57cec5SDimitry Andric  *
40b57cec5SDimitry Andric  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
50b57cec5SDimitry Andric  * Copyright (c) 1992, 1993, 1994
60b57cec5SDimitry Andric  *	The Regents of the University of California.  All rights reserved.
70b57cec5SDimitry Andric  *
80b57cec5SDimitry Andric  * This code is derived from software contributed to Berkeley by
90b57cec5SDimitry Andric  * Henry Spencer.
100b57cec5SDimitry Andric  *
110b57cec5SDimitry Andric  * Redistribution and use in source and binary forms, with or without
120b57cec5SDimitry Andric  * modification, are permitted provided that the following conditions
130b57cec5SDimitry Andric  * are met:
140b57cec5SDimitry Andric  * 1. Redistributions of source code must retain the above copyright
150b57cec5SDimitry Andric  *    notice, this list of conditions and the following disclaimer.
160b57cec5SDimitry Andric  * 2. Redistributions in binary form must reproduce the above copyright
170b57cec5SDimitry Andric  *    notice, this list of conditions and the following disclaimer in the
180b57cec5SDimitry Andric  *    documentation and/or other materials provided with the distribution.
190b57cec5SDimitry Andric  * 3. Neither the name of the University nor the names of its contributors
200b57cec5SDimitry Andric  *    may be used to endorse or promote products derived from this software
210b57cec5SDimitry Andric  *    without specific prior written permission.
220b57cec5SDimitry Andric  *
230b57cec5SDimitry Andric  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
240b57cec5SDimitry Andric  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
250b57cec5SDimitry Andric  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
260b57cec5SDimitry Andric  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
270b57cec5SDimitry Andric  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
280b57cec5SDimitry Andric  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
290b57cec5SDimitry Andric  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
300b57cec5SDimitry Andric  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
310b57cec5SDimitry Andric  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
320b57cec5SDimitry Andric  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
330b57cec5SDimitry Andric  * SUCH DAMAGE.
340b57cec5SDimitry Andric  *
350b57cec5SDimitry Andric  *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
360b57cec5SDimitry Andric  */
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric /*
390b57cec5SDimitry Andric  * the outer shell of llvm_regexec()
400b57cec5SDimitry Andric  *
410b57cec5SDimitry Andric  * This file includes engine.inc *twice*, after muchos fiddling with the
420b57cec5SDimitry Andric  * macros that code uses.  This lets the same code operate on two different
430b57cec5SDimitry Andric  * representations for state sets.
440b57cec5SDimitry Andric  */
450b57cec5SDimitry Andric #include <sys/types.h>
460b57cec5SDimitry Andric #include <stdio.h>
470b57cec5SDimitry Andric #include <stdlib.h>
480b57cec5SDimitry Andric #include <string.h>
490b57cec5SDimitry Andric #include <limits.h>
500b57cec5SDimitry Andric #include <ctype.h>
510b57cec5SDimitry Andric #include "regex_impl.h"
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric #include "regutils.h"
540b57cec5SDimitry Andric #include "regex2.h"
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric /* macros for manipulating states, small version */
570b57cec5SDimitry Andric /* FIXME: 'states' is assumed as 'long' on small version. */
580b57cec5SDimitry Andric #define	states1	long		/* for later use in llvm_regexec() decision */
590b57cec5SDimitry Andric #define	states	states1
600b57cec5SDimitry Andric #define	CLEAR(v)	((v) = 0)
610b57cec5SDimitry Andric #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
620b57cec5SDimitry Andric #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
630b57cec5SDimitry Andric #define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
640b57cec5SDimitry Andric #define	ASSIGN(d, s)	((d) = (s))
650b57cec5SDimitry Andric #define	EQ(a, b)	((a) == (b))
660b57cec5SDimitry Andric #define	STATEVARS	long dummy	/* dummy version */
670b57cec5SDimitry Andric #define	STATESETUP(m, n)	/* nothing */
680b57cec5SDimitry Andric #define	STATETEARDOWN(m)	/* nothing */
690b57cec5SDimitry Andric #define	SETUP(v)	((v) = 0)
700b57cec5SDimitry Andric #define	onestate	long
710b57cec5SDimitry Andric #define	INIT(o, n)	((o) = (unsigned long)1 << (n))
720b57cec5SDimitry Andric #define	INC(o)		((o) = (unsigned long)(o) << 1)
730b57cec5SDimitry Andric #define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
740b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
750b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
760b57cec5SDimitry Andric #define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
770b57cec5SDimitry Andric #define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
780b57cec5SDimitry Andric #define	ISSETBACK(v, n)		(((v) & ((unsigned long)here >> (n))) != 0)
790b57cec5SDimitry Andric /* function names */
800b57cec5SDimitry Andric #define SNAMES			/* engine.inc looks after details */
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric #include "regengine.inc"
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric /* now undo things */
850b57cec5SDimitry Andric #undef	states
860b57cec5SDimitry Andric #undef	CLEAR
870b57cec5SDimitry Andric #undef	SET0
880b57cec5SDimitry Andric #undef	SET1
890b57cec5SDimitry Andric #undef	ISSET
900b57cec5SDimitry Andric #undef	ASSIGN
910b57cec5SDimitry Andric #undef	EQ
920b57cec5SDimitry Andric #undef	STATEVARS
930b57cec5SDimitry Andric #undef	STATESETUP
940b57cec5SDimitry Andric #undef	STATETEARDOWN
950b57cec5SDimitry Andric #undef	SETUP
960b57cec5SDimitry Andric #undef	onestate
970b57cec5SDimitry Andric #undef	INIT
980b57cec5SDimitry Andric #undef	INC
990b57cec5SDimitry Andric #undef	ISSTATEIN
1000b57cec5SDimitry Andric #undef	FWD
1010b57cec5SDimitry Andric #undef	BACK
1020b57cec5SDimitry Andric #undef	ISSETBACK
1030b57cec5SDimitry Andric #undef	SNAMES
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric /* macros for manipulating states, large version */
1060b57cec5SDimitry Andric #define	states	char *
1070b57cec5SDimitry Andric #define	CLEAR(v)	memset(v, 0, m->g->nstates)
1080b57cec5SDimitry Andric #define	SET0(v, n)	((v)[n] = 0)
1090b57cec5SDimitry Andric #define	SET1(v, n)	((v)[n] = 1)
1100b57cec5SDimitry Andric #define	ISSET(v, n)	((v)[n])
1110b57cec5SDimitry Andric #define	ASSIGN(d, s)	memmove(d, s, m->g->nstates)
1120b57cec5SDimitry Andric #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
1130b57cec5SDimitry Andric #define	STATEVARS	long vn; char *space
1140b57cec5SDimitry Andric #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
1150b57cec5SDimitry Andric 				if ((m)->space == NULL) return(REG_ESPACE); \
1160b57cec5SDimitry Andric 				(m)->vn = 0; }
1170b57cec5SDimitry Andric #define	STATETEARDOWN(m)	{ free((m)->space); }
1180b57cec5SDimitry Andric #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
1190b57cec5SDimitry Andric #define	onestate	long
1200b57cec5SDimitry Andric #define	INIT(o, n)	((o) = (n))
1210b57cec5SDimitry Andric #define	INC(o)	((o)++)
1220b57cec5SDimitry Andric #define	ISSTATEIN(v, o)	((v)[o])
1230b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
1240b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
1250b57cec5SDimitry Andric #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
1260b57cec5SDimitry Andric #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
1270b57cec5SDimitry Andric #define	ISSETBACK(v, n)	((v)[here - (n)])
1280b57cec5SDimitry Andric /* function names */
1290b57cec5SDimitry Andric #define	LNAMES			/* flag */
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric #include "regengine.inc"
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric /*
1340b57cec5SDimitry Andric  - llvm_regexec - interface for matching
1350b57cec5SDimitry Andric  *
1360b57cec5SDimitry Andric  * We put this here so we can exploit knowledge of the state representation
1370b57cec5SDimitry Andric  * when choosing which matcher to call.  Also, by this point the matchers
1380b57cec5SDimitry Andric  * have been prototyped.
1390b57cec5SDimitry Andric  */
1400b57cec5SDimitry Andric int				/* 0 success, REG_NOMATCH failure */
llvm_regexec(const llvm_regex_t * preg,const char * string,size_t nmatch,llvm_regmatch_t pmatch[],int eflags)1410b57cec5SDimitry Andric llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
1420b57cec5SDimitry Andric              llvm_regmatch_t pmatch[], int eflags)
1430b57cec5SDimitry Andric {
1440b57cec5SDimitry Andric 	struct re_guts *g = preg->re_g;
1450b57cec5SDimitry Andric #ifdef REDEBUG
1460b57cec5SDimitry Andric #	define	GOODFLAGS(f)	(f)
1470b57cec5SDimitry Andric #else
1480b57cec5SDimitry Andric #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
1490b57cec5SDimitry Andric #endif
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
1520b57cec5SDimitry Andric 		return(REG_BADPAT);
1530b57cec5SDimitry Andric 	assert(!(g->iflags&REGEX_BAD));
1540b57cec5SDimitry Andric 	if (g->iflags&REGEX_BAD)		/* backstop for no-debug case */
1550b57cec5SDimitry Andric 		return(REG_BADPAT);
1560b57cec5SDimitry Andric 	eflags = GOODFLAGS(eflags);
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric 	if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
1590b57cec5SDimitry Andric 		return(smatcher(g, string, nmatch, pmatch, eflags));
1600b57cec5SDimitry Andric 	else
1610b57cec5SDimitry Andric 		return(lmatcher(g, string, nmatch, pmatch, eflags));
1620b57cec5SDimitry Andric }
163