1 /*
2  * the outer shell of regexec()
3  *
4  * This file includes engine.c *twice*, after muchos fiddling with the
5  * macros that code uses.  This lets the same code operate on two different
6  * representations for state sets.
7  */
8 #include <my_global.h>
9 #include <m_string.h>
10 #include <m_ctype.h>
11 #ifdef _WIN32
12 #include  <limits.h>
13 #endif
14 #include "my_regex.h"
15 #include "utils.h"
16 #include "regex2.h"
17 
18 /* for use in asserts */
19 #define nope 0
20 
21 /* macros for manipulating states, small version */
22 #define	states	long
23 #define	states1	long		/* for later use in regexec() decision. Ensure Win64 definition is correct.*/
24 #define	CLEAR(v)	((v) = 0)
25 #define	SET0(v, n)	((v) &= ~((states) 1 << (n)))
26 #define	SET1(v, n)	((v) |= (states) 1 << (n))
27 #define	ISSET(v, n)	((v) & ((states) 1 << (n)))
28 #define	ASSIGN(d, s)	((d) = (s))
29 #define	EQ(a, b)	((a) == (b))
30 #define	STATEVARS	int dummy	/* dummy version */
31 #define	STATESETUP(m, n)	/* nothing */
32 #define	STATETEARDOWN(m)	/* nothing */
33 #define	SETUP(v)	((v) = 0)
34 #define	onestate	long			/* Changed from int by Monty */
35 #define	INIT(o, n)	((o) = (unsigned states)1 << (n))
36 #define	INC(o)	((o) <<= 1)
37 #define	ISSTATEIN(v, o)	((v) & (o))
38 /* some abbreviations; note that some of these know variable names! */
39 /* do "if I'm here, I can also be there" etc without branches */
40 #define	FWD(dst, src, n)	((dst) |= ((unsigned states)(src)&(here)) << (n))
41 #define	BACK(dst, src, n)	((dst) |= ((unsigned states)(src)&(here)) >> (n))
42 #define	ISSETBACK(v, n)	((v) & ((unsigned states)here >> (n)))
43 /* function names */
44 #define SNAMES			/* engine.c looks after details */
45 
46 #include "engine.c"
47 
48 /* now undo things */
49 #undef	states
50 #undef	CLEAR
51 #undef	SET0
52 #undef	SET1
53 #undef	ISSET
54 #undef	ASSIGN
55 #undef	EQ
56 #undef	STATEVARS
57 #undef	STATESETUP
58 #undef	STATETEARDOWN
59 #undef	SETUP
60 #undef	onestate
61 #undef	INIT
62 #undef	INC
63 #undef	ISSTATEIN
64 #undef	FWD
65 #undef	BACK
66 #undef	ISSETBACK
67 #undef	SNAMES
68 
69 /* macros for manipulating states, large version */
70 #define	states	char *
71 #define	CLEAR(v)	memset(v, 0, m->g->nstates)
72 #define	SET0(v, n)	((v)[n] = 0)
73 #define	SET1(v, n)	((v)[n] = 1)
74 #define	ISSET(v, n)	((v)[n])
75 #define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
76 #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
77 #define	STATEVARS	int vn; char *space
78 #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
79 				if ((m)->space == NULL) return(MY_REG_ESPACE); \
80 				(m)->vn = 0; }
81 #define	STATETEARDOWN(m)	{ free((m)->space); }
82 #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
83 #define	onestate	int
84 #define	INIT(o, n)	((o) = (n))
85 #define	INC(o)	((o)++)
86 #define	ISSTATEIN(v, o)	((v)[o])
87 /* some abbreviations; note that some of these know variable names! */
88 /* do "if I'm here, I can also be there" etc without branches */
89 #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
90 #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
91 #define	ISSETBACK(v, n)	((v)[here - (n)])
92 /* function names */
93 #define	LNAMES			/* flag */
94 
95 #include "engine.c"
96 
97 /*
98  - regexec - interface for matching
99  = extern int regexec(const regex_t *, const char *, size_t, \
100  =					regmatch_t [], int);
101  = #define	MY_REG_NOTBOL	00001
102  = #define	MY_REG_NOTEOL	00002
103  = #define	MY_REG_STARTEND	00004
104  = #define	MY_REG_TRACE	00400	// tracing of execution
105  = #define	MY_REG_LARGE	01000	// force large representation
106  = #define	MY_REG_BACKR	02000	// force use of backref code
107  *
108  * We put this here so we can exploit knowledge of the state representation
109  * when choosing which matcher to call.  Also, by this point the matchers
110  * have been prototyped.
111  */
112 
113 /**
114   my_regexec matches the compiled RE pointed to by preg against the
115   string, subject to the flags in eflags, and reports results using
116   nmatch, pmatch, and the returned value.  The RE must have been
117   compiled by a previous invocation of my_regcomp.
118 
119   By default, the NULL-terminated string pointed to by string is
120   considered to be the text of an entire line, minus any terminating
121   newline.  The eflags argument is the bitwise OR of zero or more of
122   the following flags:
123 
124   MY_REG_NOTBOL   The first character of the string is not the beginning of
125                   a line, so the `^' anchor should not match before it.
126                   This does not affect the behavior of newlines under
127                   MY_REG_NEWLINE.
128 
129   MY_REG_NOTEOL   The NULL terminating the string does not end a line, so the
130                   `$' anchor should not match before it. This does not affect
131                   the behavior of newlines under MY_REG_NEWLINE.
132 
133   MY_REG_STARTEND The string is considered to start at string +
134                   pmatch[0].rm_so and to have a terminating NUL located
135                   at string + pmatch[0].rm_eo (there need not actually be
136                   a NUL at that location), regardless of the value of nmatch.
137 
138   @return 0 success, MY_REG_NOMATCH failure
139  */
140 int
my_regexec(preg,str,nmatch,pmatch,eflags)141 my_regexec(preg, str, nmatch, pmatch, eflags)
142 const my_regex_t *preg;
143 const char *str;
144 size_t nmatch;
145 my_regmatch_t pmatch[];
146 int eflags;
147 {
148 	char *pstr = (char *) str;
149 	struct re_guts *g = preg->re_g;
150 #ifdef REDEBUG
151 #	define	GOODFLAGS(f)	(f)
152 #else
153 #	define	GOODFLAGS(f)	((f)&(MY_REG_NOTBOL|MY_REG_NOTEOL|MY_REG_STARTEND))
154 #endif
155 
156 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
157 		return(MY_REG_BADPAT);
158 	assert(!(g->iflags&BAD));
159 	if (g->iflags&BAD)		/* backstop for no-debug case */
160 		return(MY_REG_BADPAT);
161 	eflags = GOODFLAGS(eflags);
162 
163 	if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) &&
164 	    !(eflags&MY_REG_LARGE))
165 		return(smatcher(preg->charset, g, pstr, nmatch, pmatch, eflags));
166 	else
167 		return(lmatcher(preg->charset, g, pstr, nmatch, pmatch, eflags));
168 }
169