1 /*
2 * the outer shell of regexec()
3 *
4 * This file includes engine.c *twice*, after muchos fiddling with the
5 * macros that code uses. This lets the same code operate on two different
6 * representations for state sets.
7 */
8 #include <my_global.h>
9 #include <m_string.h>
10 #include <m_ctype.h>
11 #ifdef _WIN32
12 #include <limits.h>
13 #endif
14 #include "my_regex.h"
15 #include "utils.h"
16 #include "regex2.h"
17
18 /* for use in asserts */
19 #define nope 0
20
21 /* macros for manipulating states, small version */
22 #define states long
23 #define states1 long /* for later use in regexec() decision. Ensure Win64 definition is correct.*/
24 #define CLEAR(v) ((v) = 0)
25 #define SET0(v, n) ((v) &= ~((states) 1 << (n)))
26 #define SET1(v, n) ((v) |= (states) 1 << (n))
27 #define ISSET(v, n) ((v) & ((states) 1 << (n)))
28 #define ASSIGN(d, s) ((d) = (s))
29 #define EQ(a, b) ((a) == (b))
30 #define STATEVARS int dummy /* dummy version */
31 #define STATESETUP(m, n) /* nothing */
32 #define STATETEARDOWN(m) /* nothing */
33 #define SETUP(v) ((v) = 0)
34 #define onestate long /* Changed from int by Monty */
35 #define INIT(o, n) ((o) = (unsigned states)1 << (n))
36 #define INC(o) ((o) <<= 1)
37 #define ISSTATEIN(v, o) ((v) & (o))
38 /* some abbreviations; note that some of these know variable names! */
39 /* do "if I'm here, I can also be there" etc without branches */
40 #define FWD(dst, src, n) ((dst) |= ((unsigned states)(src)&(here)) << (n))
41 #define BACK(dst, src, n) ((dst) |= ((unsigned states)(src)&(here)) >> (n))
42 #define ISSETBACK(v, n) ((v) & ((unsigned states)here >> (n)))
43 /* function names */
44 #define SNAMES /* engine.c looks after details */
45
46 #include "engine.c"
47
48 /* now undo things */
49 #undef states
50 #undef CLEAR
51 #undef SET0
52 #undef SET1
53 #undef ISSET
54 #undef ASSIGN
55 #undef EQ
56 #undef STATEVARS
57 #undef STATESETUP
58 #undef STATETEARDOWN
59 #undef SETUP
60 #undef onestate
61 #undef INIT
62 #undef INC
63 #undef ISSTATEIN
64 #undef FWD
65 #undef BACK
66 #undef ISSETBACK
67 #undef SNAMES
68
69 /* macros for manipulating states, large version */
70 #define states char *
71 #define CLEAR(v) memset(v, 0, m->g->nstates)
72 #define SET0(v, n) ((v)[n] = 0)
73 #define SET1(v, n) ((v)[n] = 1)
74 #define ISSET(v, n) ((v)[n])
75 #define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
76 #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
77 #define STATEVARS int vn; char *space
78 #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
79 if ((m)->space == NULL) return(MY_REG_ESPACE); \
80 (m)->vn = 0; }
81 #define STATETEARDOWN(m) { free((m)->space); }
82 #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
83 #define onestate int
84 #define INIT(o, n) ((o) = (n))
85 #define INC(o) ((o)++)
86 #define ISSTATEIN(v, o) ((v)[o])
87 /* some abbreviations; note that some of these know variable names! */
88 /* do "if I'm here, I can also be there" etc without branches */
89 #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
90 #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
91 #define ISSETBACK(v, n) ((v)[here - (n)])
92 /* function names */
93 #define LNAMES /* flag */
94
95 #include "engine.c"
96
97 /*
98 - regexec - interface for matching
99 = extern int regexec(const regex_t *, const char *, size_t, \
100 = regmatch_t [], int);
101 = #define MY_REG_NOTBOL 00001
102 = #define MY_REG_NOTEOL 00002
103 = #define MY_REG_STARTEND 00004
104 = #define MY_REG_TRACE 00400 // tracing of execution
105 = #define MY_REG_LARGE 01000 // force large representation
106 = #define MY_REG_BACKR 02000 // force use of backref code
107 *
108 * We put this here so we can exploit knowledge of the state representation
109 * when choosing which matcher to call. Also, by this point the matchers
110 * have been prototyped.
111 */
112
113 /**
114 my_regexec matches the compiled RE pointed to by preg against the
115 string, subject to the flags in eflags, and reports results using
116 nmatch, pmatch, and the returned value. The RE must have been
117 compiled by a previous invocation of my_regcomp.
118
119 By default, the NULL-terminated string pointed to by string is
120 considered to be the text of an entire line, minus any terminating
121 newline. The eflags argument is the bitwise OR of zero or more of
122 the following flags:
123
124 MY_REG_NOTBOL The first character of the string is not the beginning of
125 a line, so the `^' anchor should not match before it.
126 This does not affect the behavior of newlines under
127 MY_REG_NEWLINE.
128
129 MY_REG_NOTEOL The NULL terminating the string does not end a line, so the
130 `$' anchor should not match before it. This does not affect
131 the behavior of newlines under MY_REG_NEWLINE.
132
133 MY_REG_STARTEND The string is considered to start at string +
134 pmatch[0].rm_so and to have a terminating NUL located
135 at string + pmatch[0].rm_eo (there need not actually be
136 a NUL at that location), regardless of the value of nmatch.
137
138 @return 0 success, MY_REG_NOMATCH failure
139 */
140 int
my_regexec(preg,str,nmatch,pmatch,eflags)141 my_regexec(preg, str, nmatch, pmatch, eflags)
142 const my_regex_t *preg;
143 const char *str;
144 size_t nmatch;
145 my_regmatch_t pmatch[];
146 int eflags;
147 {
148 char *pstr = (char *) str;
149 struct re_guts *g = preg->re_g;
150 #ifdef REDEBUG
151 # define GOODFLAGS(f) (f)
152 #else
153 # define GOODFLAGS(f) ((f)&(MY_REG_NOTBOL|MY_REG_NOTEOL|MY_REG_STARTEND))
154 #endif
155
156 if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
157 return(MY_REG_BADPAT);
158 assert(!(g->iflags&BAD));
159 if (g->iflags&BAD) /* backstop for no-debug case */
160 return(MY_REG_BADPAT);
161 eflags = GOODFLAGS(eflags);
162
163 if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) &&
164 !(eflags&MY_REG_LARGE))
165 return(smatcher(preg->charset, g, pstr, nmatch, pmatch, eflags));
166 else
167 return(lmatcher(preg->charset, g, pstr, nmatch, pmatch, eflags));
168 }
169