1 /*
2  * Copyright (c) 2015-2020, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30     \brief Declarations for the main NFA engine types and structures.
31 */
32 #ifndef NFA_INTERNAL_H
33 #define NFA_INTERNAL_H
34 
35 #ifdef __cplusplus
36 extern "C"
37 {
38 #endif
39 
40 #include "ue2common.h"
41 
42 // Constants
43 
44 #define MO_INVALID_IDX  0xffffffff /**< index meaning value is invalid */
45 
46 // Flags (used in NFA::flags)
47 
48 #define NFA_ACCEPTS_EOD 1U     /**< can produce matches on EOD. */
49 #define NFA_ZOMBIE      2U     /**< supports zombies */
50 
51 // Common data structures for NFAs
52 
53 enum NFAEngineType {
54     LIMEX_NFA_32,
55     LIMEX_NFA_64,
56     LIMEX_NFA_128,
57     LIMEX_NFA_256,
58     LIMEX_NFA_384,
59     LIMEX_NFA_512,
60     MCCLELLAN_NFA_8,    /**< magic pseudo nfa */
61     MCCLELLAN_NFA_16,   /**< magic pseudo nfa */
62     GOUGH_NFA_8,        /**< magic pseudo nfa */
63     GOUGH_NFA_16,       /**< magic pseudo nfa */
64     MPV_NFA,            /**< magic pseudo nfa */
65     LBR_NFA_DOT,        /**< magic pseudo nfa */
66     LBR_NFA_VERM,       /**< magic pseudo nfa */
67     LBR_NFA_NVERM,      /**< magic pseudo nfa */
68     LBR_NFA_SHUF,       /**< magic pseudo nfa */
69     LBR_NFA_TRUF,       /**< magic pseudo nfa */
70     CASTLE_NFA,         /**< magic pseudo nfa */
71     SHENG_NFA,          /**< magic pseudo nfa */
72     TAMARAMA_NFA,       /**< magic nfa container */
73     MCSHENG_NFA_8,      /**< magic pseudo nfa */
74     MCSHENG_NFA_16,     /**< magic pseudo nfa */
75     SHENG_NFA_32,       /**< magic pseudo nfa */
76     SHENG_NFA_64,       /**< magic pseudo nfa */
77     MCSHENG_64_NFA_8,   /**< magic pseudo nfa */
78     MCSHENG_64_NFA_16,  /**< magic pseudo nfa */
79     /** \brief bogus NFA - not used */
80     INVALID_NFA
81 };
82 
83 /** \brief header for the NFA implementation. */
84 struct ALIGN_CL_DIRECTIVE NFA {
85     u32 flags;
86 
87     /** \brief The size in bytes of the NFA engine. The engine is
88      * serialized to the extent that copying length bytes back into a
89      * 16-byte aligned memory location yields a structure that has the same
90      * behaviour as the original engine. */
91     u32 length;
92 
93     /** \brief Active implementation used by this NFAEngineType */
94     u8 type;
95 
96     u8 rAccelType;
97     u8 rAccelOffset;
98     u8 maxBiAnchoredWidth; /**< if non zero, max width of the block */
99 
100     union {
101         u8 c;
102         u16 dc;
103         u8 array[2];
104     } rAccelData;
105 
106     u32 queueIndex; /**< index of the associated queue in scratch */
107 
108     /** \brief The number of valid positions/states for this NFA. Debug only */
109     u32 nPositions;
110 
111     /** \brief Size of the state required in scratch space.
112      *
113      * This state has less strict size requirements (as it doesn't go in stream
114      * state) and does not persist between stream writes.
115      */
116     u32 scratchStateSize;
117 
118     /** \brief Size of the state required in stream state.
119      *
120      * This encompasses all state stored by the engine that must persist between
121      * stream writes. */
122     u32 streamStateSize;
123 
124     u32 maxWidth; /**< longest possible match in this NFA, 0 if unbounded */
125     u32 minWidth; /**< minimum bytes required to match this NFA */
126     u32 maxOffset; /**< non zero: maximum offset this pattern can match at */
127 
128     /* Note: implementation (e.g. a LimEx) directly follows struct in memory */
129 } ;
130 
131 // Accessor macro for the implementation NFA: we do things this way to avoid
132 // type-punning warnings.
133 #define getImplNfa(nfa) \
134     ((const void *)((const char *)(nfa) + sizeof(struct NFA)))
135 
136 // Non-const version of the above, used at compile time.
137 #define getMutableImplNfa(nfa)     ((char *)(nfa) + sizeof(struct NFA))
138 
nfaAcceptsEod(const struct NFA * nfa)139 static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) {
140     return nfa->flags & NFA_ACCEPTS_EOD;
141 }
142 
nfaSupportsZombie(const struct NFA * nfa)143 static really_inline u32 nfaSupportsZombie(const struct NFA *nfa) {
144     return nfa->flags & NFA_ZOMBIE;
145 }
146 
147 /** \brief True if the given type (from NFA::type) is a McClellan DFA. */
isMcClellanType(u8 t)148 static really_inline int isMcClellanType(u8 t) {
149     return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16;
150 }
151 
152 /** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
153  * DFA. */
isShengMcClellanType(u8 t)154 static really_inline int isShengMcClellanType(u8 t) {
155     return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 ||
156            t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16;
157 }
158 
159 /** \brief True if the given type (from NFA::type) is a Gough DFA. */
isGoughType(u8 t)160 static really_inline int isGoughType(u8 t) {
161     return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
162 }
163 
164 /** \brief True if the given type (from NFA::type) is a Sheng DFA. */
isSheng16Type(u8 t)165 static really_inline int isSheng16Type(u8 t) {
166     return t == SHENG_NFA;
167 }
168 
169 /** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */
isSheng32Type(u8 t)170 static really_inline int isSheng32Type(u8 t) {
171     return t == SHENG_NFA_32;
172 }
173 
174 /** \brief True if the given type (from NFA::type) is a Sheng64 DFA. */
isSheng64Type(u8 t)175 static really_inline int isSheng64Type(u8 t) {
176     return t == SHENG_NFA_64;
177 }
178 
179 /** \brief True if the given type (from NFA::type) is a Sheng16/32/64 DFA. */
isShengType(u8 t)180 static really_inline int isShengType(u8 t) {
181     return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64;
182 }
183 
184 /**
185  * \brief True if the given type (from NFA::type) is a McClellan, Gough or
186  * Sheng DFA.
187  */
isDfaType(u8 t)188 static really_inline int isDfaType(u8 t) {
189     return isMcClellanType(t) || isGoughType(t) || isShengType(t)
190         || isShengMcClellanType(t);
191 }
192 
isBigDfaType(u8 t)193 static really_inline int isBigDfaType(u8 t) {
194     return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16;
195 }
196 
isSmallDfaType(u8 t)197 static really_inline int isSmallDfaType(u8 t) {
198     return isDfaType(t) && !isBigDfaType(t);
199 }
200 
201 /** \brief True if the given type (from NFA::type) is an NFA. */
isNfaType(u8 t)202 static really_inline int isNfaType(u8 t) {
203     switch (t) {
204     case LIMEX_NFA_32:
205     case LIMEX_NFA_64:
206     case LIMEX_NFA_128:
207     case LIMEX_NFA_256:
208     case LIMEX_NFA_384:
209     case LIMEX_NFA_512:
210         return 1;
211     default:
212         break;
213     }
214     return 0;
215 }
216 
217 /** \brief True if the given type (from NFA::type) is an LBR. */
218 static really_inline
isLbrType(u8 t)219 int isLbrType(u8 t) {
220     return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM ||
221            t == LBR_NFA_SHUF || t == LBR_NFA_TRUF;
222 }
223 
224 /** \brief True if the given type (from NFA::type) is a container engine. */
225 static really_inline
isContainerType(u8 t)226 int isContainerType(u8 t) {
227     return t == TAMARAMA_NFA;
228 }
229 
230 static really_inline
isMultiTopType(u8 t)231 int isMultiTopType(u8 t) {
232     return !isDfaType(t) && !isLbrType(t);
233 }
234 
235 /** Macros used in place of unimplemented NFA API functions for a given
236  * engine. */
237 #if !defined(_WIN32)
238 
239 /* Use for functions that return an integer. */
240 #define NFA_API_NO_IMPL(...)                                                   \
241     ({                                                                         \
242         assert(!"not implemented for this engine!");                           \
243         0; /* return value, for places that need it */                         \
244     })
245 
246 /* Use for _zombie_status functions. */
247 #define NFA_API_ZOMBIE_NO_IMPL(...)                                            \
248     ({                                                                         \
249         assert(!"not implemented for this engine!");                           \
250         NFA_ZOMBIE_NO;                                                         \
251     })
252 
253 #else
254 
255 /* Simpler implementation for compilers that don't like the GCC extension used
256  * above. */
257 #define NFA_API_NO_IMPL(...)        0
258 #define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
259 
260 #endif
261 
262 #ifdef __cplusplus
263 }
264 #endif
265 
266 #endif
267