1 /*
2  * Copyright (c) 2015-2020, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30     This file provides the internal structures and definitions required for the
31     real NFAs (aka limex NFAs );
32 
33     Limex NFAs now have variable length in memory. They look like this:
34 
35         LimExNFA structure
36             Fixed length, e.g. LimExNFA256.
37         Reachability table
38             Variable length array of state bitvectors, mapped into by
39             NFACommonXXX.reachMap.
40         Tops
41             Variable length array of state bitvectors, used for TOP_N events.
42         Acceleration structures
43             Variable length array of AccelAux structs.
44         Accepts
45             Variable length array of NFAAccept structs.
46         EOD Accepts
47             Variable length array of NFAAccept structs.
48         Exceptions
49             Variable length array of NFAExceptionXXX structs.
50         Repeat Structure Offsets
51             Array of u32 offsets that point at each "Repeat Structure" (below)
52         Repeat Structures
53             Variable length repeat structures, addressed via
54             NFAException32::repeatOffset etc.
55 
56     The state associated with the NFA is split into:
57 
58     -# The "traditional" NFA state as a bitvector. This is stored in the
59        first N bytes of the state space (length given in
60        NFACommonXXX.stateSize), and may be stored shrunk to CEIL(stateSize/8)
61        or compressed. If it is stored compressed, than the
62        LIMEX_FLAG_COMPRESS_STATE flag is set in NFACommonXXX.flags.
63     -# Extended NFA state, only used in some LimEx NFAs. This consists of a
64        variable length array of LimExNFAExtendedState structures, each with
65        pointers to a packed list of mmbit structures that follows them. Only
66        present when used.
67 
68     The value of NFA.stateSize gives the total state size in bytes (the sum of
69     all the above).
70 
71     Number of shifts should be always greater or equal to 1
72     Number of shifts 0 means that no appropriate NFA engine was found.
73 
74 */
75 
76 #ifndef LIMEX_INTERNAL_H
77 #define LIMEX_INTERNAL_H
78 
79 #include "nfa_internal.h"
80 #include "repeat_internal.h"
81 
82 // Constants
83 #define MAX_SHIFT_COUNT 8   /**< largest number of shifts used by a LimEx NFA */
84 #define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */
85 
86 #define LIMEX_FLAG_COMPRESS_STATE  1 /**< pack state into stream state */
87 #define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
88 #define LIMEX_FLAG_CANNOT_DIE      4 /**< limex cannot have no states on */
89 #define LIMEX_FLAG_EXTRACT_EXP     8 /**< use limex exception bit extraction */
90 
91 enum LimExTrigger {
92     LIMEX_TRIGGER_NONE = 0,
93     LIMEX_TRIGGER_POS = 1,
94     LIMEX_TRIGGER_TUG = 2
95 };
96 
97 enum LimExSquash {
98     LIMEX_SQUASH_NONE = 0,   //!< no squash for you!
99     LIMEX_SQUASH_CYCLIC = 1, //!< squash due to cyclic state
100     LIMEX_SQUASH_TUG = 2,    //!< squash due to tug trigger with stale estate
101     LIMEX_SQUASH_REPORT = 3  //!< squash when report is raised
102 };
103 
104 /* uniform looking types for the macros */
105 typedef u8   u_8;
106 typedef u16  u_16;
107 typedef u32  u_32;
108 typedef u64a u_64;
109 typedef m128 u_128;
110 typedef m256 u_256;
111 typedef m384 u_384;
112 typedef m512 u_512;
113 
114 #define CREATE_NFA_LIMEX(size)                                              \
115 struct NFAException##size {                                                 \
116     u_##size squash; /**< mask of states to leave on */                     \
117     u_##size successors; /**< mask of states to switch on */                \
118     u32 reports; /**< offset to start of reports list, or MO_INVALID_IDX */ \
119     u32 repeatOffset; /**< offset to NFARepeatInfo, or MO_INVALID_IDX */    \
120     u8 hasSquash; /**< from enum LimExSquash */                             \
121     u8 trigger; /**< from enum LimExTrigger */                              \
122 };                                                                          \
123                                                                             \
124 struct LimExNFA##size {                                                     \
125     u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */           \
126     u32 reachSize; /**< number of reach masks */                            \
127     u32 accelCount; /**< number of entries in accel table */                \
128     u32 accelTableOffset; /* rel. to start of LimExNFA */                   \
129     u32 accelAuxCount; /**< number of entries in aux table */               \
130     u32 accelAuxOffset; /* rel. to start of LimExNFA */                     \
131     u32 acceptCount;                                                        \
132     u32 acceptOffset; /* rel. to start of LimExNFA */                       \
133     u32 acceptEodCount;                                                     \
134     u32 acceptEodOffset; /* rel. to start of LimExNFA */                    \
135     u32 exceptionCount;                                                     \
136     u32 exceptionOffset; /* rel. to start of LimExNFA */                    \
137     u32 repeatCount;                                                        \
138     u32 repeatOffset;                                                       \
139     u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
140     u32 squashCount;                                                        \
141     u32 topCount;                                                           \
142     u32 topOffset; /* rel. to start of LimExNFA */                          \
143     u32 stateSize; /**< not including extended history */                   \
144     u32 flags;                                                              \
145     u_##size init;                                                          \
146     u_##size initDS;                                                        \
147     u_##size accept; /**< mask of accept states */                          \
148     u_##size acceptAtEOD; /**< mask of states that accept at EOD */         \
149     u_##size accel; /**< mask of accelerable states */                      \
150     u_##size accelPermute; /**< pshufb permute mask (not GPR) */            \
151     u_##size accelCompare; /**< pshufb compare mask (not GPR) */            \
152     u_##size accel_and_friends; /**< mask of accelerable states + likely
153                                     *  followers */                         \
154     u_##size compressMask; /**< switch off before compress */               \
155     u_##size exceptionMask;                                                 \
156     u_##size repeatCyclicMask; /**< also includes tug states */             \
157     u_##size zombieMask; /**< zombie if in any of the set states */         \
158     u_##size shift[MAX_SHIFT_COUNT];                                        \
159     u32 shiftCount; /**< number of shift masks used */                      \
160     u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */     \
161     m512 exceptionShufMask; /**< exception byte shuffle mask  */            \
162     m512 exceptionBitMask; /**< exception bit mask */                       \
163     m512 exceptionAndMask; /**< exception and mask */                       \
164 };
165 
166 CREATE_NFA_LIMEX(32)
167 CREATE_NFA_LIMEX(64)
168 CREATE_NFA_LIMEX(128)
169 CREATE_NFA_LIMEX(256)
170 CREATE_NFA_LIMEX(384)
171 CREATE_NFA_LIMEX(512)
172 
173 /** \brief Structure describing a bounded repeat within the LimEx NFA.
174  *
175  * This struct is followed in memory by:
176  *
177  * -# a RepeatInfo structure
178  * -# a variable-sized lookup table for REPEAT_SPARSE_OPTIMAL_P repeats
179  * -# a TUG mask
180  */
181 struct NFARepeatInfo {
182     u32 cyclicState;      //!< index of this repeat's cyclic state
183     u32 ctrlIndex;        //!< index of this repeat's control block
184     u32 packedCtrlOffset; //!< offset to packed control block in stream state
185     u32 stateOffset;      //!< offset to repeat state in stream state
186     u32 stateSize;        //!< total size of packed stream state for this repeat
187     u32 tugMaskOffset;    //!< offset to tug mask (rel. to NFARepeatInfo)
188 };
189 
190 struct NFAAccept {
191     u8 single_report; //!< If true, 'reports' is report id.
192 
193     /**
194      * \brief If single report is true, this is the report id to fire.
195      * Otherwise, it is the offset (relative to the start of the LimExNFA
196      * structure) of a list of reports, terminated with MO_INVALID_IDX.
197      */
198     u32 reports;
199 
200     u32 squash;  //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX.
201 };
202 
203 #endif
204