1 /*
2 * Copyright (c) 2015-2020, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /** \file
30 \brief Declarations for the main NFA engine types and structures.
31 */
32 #ifndef NFA_INTERNAL_H
33 #define NFA_INTERNAL_H
34
35 #ifdef __cplusplus
36 extern "C"
37 {
38 #endif
39
40 #include "ue2common.h"
41
42 // Constants
43
44 #define MO_INVALID_IDX 0xffffffff /**< index meaning value is invalid */
45
46 // Flags (used in NFA::flags)
47
48 #define NFA_ACCEPTS_EOD 1U /**< can produce matches on EOD. */
49 #define NFA_ZOMBIE 2U /**< supports zombies */
50
51 // Common data structures for NFAs
52
53 enum NFAEngineType {
54 LIMEX_NFA_32,
55 LIMEX_NFA_64,
56 LIMEX_NFA_128,
57 LIMEX_NFA_256,
58 LIMEX_NFA_384,
59 LIMEX_NFA_512,
60 MCCLELLAN_NFA_8, /**< magic pseudo nfa */
61 MCCLELLAN_NFA_16, /**< magic pseudo nfa */
62 GOUGH_NFA_8, /**< magic pseudo nfa */
63 GOUGH_NFA_16, /**< magic pseudo nfa */
64 MPV_NFA, /**< magic pseudo nfa */
65 LBR_NFA_DOT, /**< magic pseudo nfa */
66 LBR_NFA_VERM, /**< magic pseudo nfa */
67 LBR_NFA_NVERM, /**< magic pseudo nfa */
68 LBR_NFA_SHUF, /**< magic pseudo nfa */
69 LBR_NFA_TRUF, /**< magic pseudo nfa */
70 CASTLE_NFA, /**< magic pseudo nfa */
71 SHENG_NFA, /**< magic pseudo nfa */
72 TAMARAMA_NFA, /**< magic nfa container */
73 MCSHENG_NFA_8, /**< magic pseudo nfa */
74 MCSHENG_NFA_16, /**< magic pseudo nfa */
75 SHENG_NFA_32, /**< magic pseudo nfa */
76 SHENG_NFA_64, /**< magic pseudo nfa */
77 MCSHENG_64_NFA_8, /**< magic pseudo nfa */
78 MCSHENG_64_NFA_16, /**< magic pseudo nfa */
79 /** \brief bogus NFA - not used */
80 INVALID_NFA
81 };
82
83 /** \brief header for the NFA implementation. */
84 struct ALIGN_CL_DIRECTIVE NFA {
85 u32 flags;
86
87 /** \brief The size in bytes of the NFA engine. The engine is
88 * serialized to the extent that copying length bytes back into a
89 * 16-byte aligned memory location yields a structure that has the same
90 * behaviour as the original engine. */
91 u32 length;
92
93 /** \brief Active implementation used by this NFAEngineType */
94 u8 type;
95
96 u8 rAccelType;
97 u8 rAccelOffset;
98 u8 maxBiAnchoredWidth; /**< if non zero, max width of the block */
99
100 union {
101 u8 c;
102 u16 dc;
103 u8 array[2];
104 } rAccelData;
105
106 u32 queueIndex; /**< index of the associated queue in scratch */
107
108 /** \brief The number of valid positions/states for this NFA. Debug only */
109 u32 nPositions;
110
111 /** \brief Size of the state required in scratch space.
112 *
113 * This state has less strict size requirements (as it doesn't go in stream
114 * state) and does not persist between stream writes.
115 */
116 u32 scratchStateSize;
117
118 /** \brief Size of the state required in stream state.
119 *
120 * This encompasses all state stored by the engine that must persist between
121 * stream writes. */
122 u32 streamStateSize;
123
124 u32 maxWidth; /**< longest possible match in this NFA, 0 if unbounded */
125 u32 minWidth; /**< minimum bytes required to match this NFA */
126 u32 maxOffset; /**< non zero: maximum offset this pattern can match at */
127
128 /* Note: implementation (e.g. a LimEx) directly follows struct in memory */
129 } ;
130
131 // Accessor macro for the implementation NFA: we do things this way to avoid
132 // type-punning warnings.
133 #define getImplNfa(nfa) \
134 ((const void *)((const char *)(nfa) + sizeof(struct NFA)))
135
136 // Non-const version of the above, used at compile time.
137 #define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA))
138
nfaAcceptsEod(const struct NFA * nfa)139 static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) {
140 return nfa->flags & NFA_ACCEPTS_EOD;
141 }
142
nfaSupportsZombie(const struct NFA * nfa)143 static really_inline u32 nfaSupportsZombie(const struct NFA *nfa) {
144 return nfa->flags & NFA_ZOMBIE;
145 }
146
147 /** \brief True if the given type (from NFA::type) is a McClellan DFA. */
isMcClellanType(u8 t)148 static really_inline int isMcClellanType(u8 t) {
149 return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16;
150 }
151
152 /** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
153 * DFA. */
isShengMcClellanType(u8 t)154 static really_inline int isShengMcClellanType(u8 t) {
155 return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 ||
156 t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16;
157 }
158
159 /** \brief True if the given type (from NFA::type) is a Gough DFA. */
isGoughType(u8 t)160 static really_inline int isGoughType(u8 t) {
161 return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
162 }
163
164 /** \brief True if the given type (from NFA::type) is a Sheng DFA. */
isSheng16Type(u8 t)165 static really_inline int isSheng16Type(u8 t) {
166 return t == SHENG_NFA;
167 }
168
169 /** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */
isSheng32Type(u8 t)170 static really_inline int isSheng32Type(u8 t) {
171 return t == SHENG_NFA_32;
172 }
173
174 /** \brief True if the given type (from NFA::type) is a Sheng64 DFA. */
isSheng64Type(u8 t)175 static really_inline int isSheng64Type(u8 t) {
176 return t == SHENG_NFA_64;
177 }
178
179 /** \brief True if the given type (from NFA::type) is a Sheng16/32/64 DFA. */
isShengType(u8 t)180 static really_inline int isShengType(u8 t) {
181 return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64;
182 }
183
184 /**
185 * \brief True if the given type (from NFA::type) is a McClellan, Gough or
186 * Sheng DFA.
187 */
isDfaType(u8 t)188 static really_inline int isDfaType(u8 t) {
189 return isMcClellanType(t) || isGoughType(t) || isShengType(t)
190 || isShengMcClellanType(t);
191 }
192
isBigDfaType(u8 t)193 static really_inline int isBigDfaType(u8 t) {
194 return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16;
195 }
196
isSmallDfaType(u8 t)197 static really_inline int isSmallDfaType(u8 t) {
198 return isDfaType(t) && !isBigDfaType(t);
199 }
200
201 /** \brief True if the given type (from NFA::type) is an NFA. */
isNfaType(u8 t)202 static really_inline int isNfaType(u8 t) {
203 switch (t) {
204 case LIMEX_NFA_32:
205 case LIMEX_NFA_64:
206 case LIMEX_NFA_128:
207 case LIMEX_NFA_256:
208 case LIMEX_NFA_384:
209 case LIMEX_NFA_512:
210 return 1;
211 default:
212 break;
213 }
214 return 0;
215 }
216
217 /** \brief True if the given type (from NFA::type) is an LBR. */
218 static really_inline
isLbrType(u8 t)219 int isLbrType(u8 t) {
220 return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM ||
221 t == LBR_NFA_SHUF || t == LBR_NFA_TRUF;
222 }
223
224 /** \brief True if the given type (from NFA::type) is a container engine. */
225 static really_inline
isContainerType(u8 t)226 int isContainerType(u8 t) {
227 return t == TAMARAMA_NFA;
228 }
229
230 static really_inline
isMultiTopType(u8 t)231 int isMultiTopType(u8 t) {
232 return !isDfaType(t) && !isLbrType(t);
233 }
234
235 /** Macros used in place of unimplemented NFA API functions for a given
236 * engine. */
237 #if !defined(_WIN32)
238
239 /* Use for functions that return an integer. */
240 #define NFA_API_NO_IMPL(...) \
241 ({ \
242 assert(!"not implemented for this engine!"); \
243 0; /* return value, for places that need it */ \
244 })
245
246 /* Use for _zombie_status functions. */
247 #define NFA_API_ZOMBIE_NO_IMPL(...) \
248 ({ \
249 assert(!"not implemented for this engine!"); \
250 NFA_ZOMBIE_NO; \
251 })
252
253 #else
254
255 /* Simpler implementation for compilers that don't like the GCC extension used
256 * above. */
257 #define NFA_API_NO_IMPL(...) 0
258 #define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
259
260 #endif
261
262 #ifdef __cplusplus
263 }
264 #endif
265
266 #endif
267