1 /*
2  * Copyright (c) 2015, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #ifndef REPEAT_INTERNAL_H
30 #define REPEAT_INTERNAL_H
31 
32 #include "ue2common.h"
33 
34 /** \file
35  *   \brief Bounded Repeat models.
36  *
37  *  Used by the NFA, to represent bounded repeats managed via special POS and
38  *  TUG exceptions, and by the LBR (limited bounded repeat) and Castle
39  *  specialist engines.
40  *
41  *  We currently have a number of different kinds of bounded repeat model, for
42  *  different kinds of {N,M} repeats, described by ::RepeatType.
43  */
44 
45 /** Different types of bounded repeats. */
46 enum RepeatType {
47     /** General mechanism for tracking {N,M} repeats. Stores the first top as
48      * an absolute offset, then subsequent tops in the {N,M} range as a ring of
49      * relative top indices stored in a multibit. */
50     REPEAT_RING,
51 
52     /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure,
53      * since only the first top encountered needs to be stored. */
54     REPEAT_FIRST,
55 
56     /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we
57      * store the most recent top encountered. */
58     REPEAT_LAST,
59 
60     /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases
61      * where there is a large difference between N and M, and developed to
62      * reduce the state requirements of this case (relative to the RING model).
63      * Uses a small ordered array of top indices relative to \ref
64      * RepeatRangeControl::offset. */
65     REPEAT_RANGE,
66 
67     /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref
68      * RepeatBitmapControl structure at runtime. */
69     REPEAT_BITMAP,
70 
71     /** Optimal mechanism for tracking {N,M} repeats when there is a bound on
72      * how frequently they can be retriggered.
73      * Assume f(repeat, min) representing the number of possible bit patterns
74      * we can have for repeat size = repeat,  minimum period = min
75      * We will have the following recurrence relation:
76      * f(repeat, min) = f(repeat - 1, min) + f(repeat - min, min);
77      * We use this recurrence to encode bit patterns with 64-bit values by
78      * referencing a table that stores values from f(0, min) to f(repeat, min)
79      * eg: repeat = 5, min = 2.  10001 => f(4,2) + f(0,2) = 9.
80      * We search the optimal patch size between min and repeat in advance and
81      * use the scheme above to do encoding and decoding to reduce stream state
82      * size. */
83     REPEAT_SPARSE_OPTIMAL_P,
84 
85     /** Used for {N,M} repeats where 0 < N < 64. Uses the
86      * \ref RepeatTrailerControl structure at runtime. */
87     REPEAT_TRAILER,
88 
89     /** Degenerate repeat that always returns true. Used by castle for pseudo
90      * [^X]* repeats. */
91     REPEAT_ALWAYS,
92 };
93 
94 /**
95  * \brief Value used to represent an unbounded max repeat.
96  *
97  * Note that we do not support \ref RepeatInfo::repeatMax values larger than
98  * this.
99  */
100 #define REPEAT_INF 65535
101 
102 /** Max slots used by ::REPEAT_RANGE repeat model. */
103 #define REPEAT_RANGE_MAX_SLOTS 16
104 
105 /** Structure describing a bounded repeat in the bytecode */
106 struct RepeatInfo {
107     u8 type; //!< from enum RepeatType.
108     u32 repeatMin; //!< minimum number of repeats.
109     u32 repeatMax; //!< maximum number of repeats, or REPEAT_INF if unbounded.
110 
111     /** Maximum value that is required to be stored in the control block
112      * counters. Any value greater than this will be capped at the horizon.
113      */
114     u32 horizon;
115 
116     /** Size of the compressed control block in bytes. This is what is written
117      * out to stream state at stream boundaries. */
118     u32 packedCtrlSize;
119 
120     /** Size of the repeat state block in bytes. This is where the REPEAT_RANGE
121      * vector and REPEAT_RING multibit are stored, in stream state, and they
122      * are manipulated directly (i.e. not copied at stream boundaries). */
123     u32 stateSize;
124 
125     /** How soon after one trigger we can see the next trigger.
126      * Used by REPEAT_SPARSE_OPTIMAL_P. */
127     u32 minPeriod;
128 
129     /** Packed control block field sizes (in bits), used by REPEAT_TRAILER. */
130     u32 packedFieldSizes[2];
131 
132     /* Number of patches, used by REPEAT_SPARSE_OPTIMAL_P. */
133     u32 patchCount;
134 
135     /* Optimal patch length, used by REPEAT_SPARSE_OPTIMAL_P. */
136     u32 patchSize;
137 
138     /* Encoding patch length in bytes, used by REPEAT_SPARSE_OPTIMAL_P. */
139     u32 encodingSize;
140 
141     /* RepeatInfo struct length including table size. */
142     u32 length;
143 
144     /** Offset of patches relative to the start of repeat stream state,
145      * used by REPEAT_SPARSE_OPTIMAL_P. */
146     u32 patchesOffset;
147 };
148 
149 /** Runtime control block structure for ::REPEAT_RING and
150  * ::REPEAT_SPARSE_OPTIMAL_P bounded repeats. Note that this struct is packed
151  * (may not be aligned). */
152 struct RepeatRingControl {
153     u64a offset; //!< index of first top.
154     u16 first; //!< start index in ring.
155     u16 last; //!< end index in ring.
156 };
157 
158 /** Runtime control block structure for ::REPEAT_RANGE bounded repeats. Note
159  * that this struct is packed (may not be aligned). */
160 struct RepeatRangeControl {
161     u64a offset; //!< index of first top.
162     u8 num; //!< number of elements in array.
163 };
164 
165 /** Runtime control block structure for cases where only a single offset is
166  * needed to track the repeat, both ::REPEAT_FIRST and ::REPEAT_LAST. Note that
167  * this struct is packed (may not be aligned). */
168 struct RepeatOffsetControl {
169     u64a offset; //!< index of a top.
170 };
171 
172 /** Runtime control block structure for ::REPEAT_BITMAP bounded repeats. */
173 struct RepeatBitmapControl {
174     u64a offset; //!< index of first top.
175     u64a bitmap; //!< forward bitmap of tops relative to base offset.
176 };
177 
178 /** Runtime control block structure for ::REPEAT_TRAILER bounded repeats. */
179 struct RepeatTrailerControl {
180     u64a offset; //!< min extent of most recent match window.
181     u64a bitmap; //!< trailing bitmap of earlier matches, relative to offset.
182 };
183 
184 /** \brief Union of control block types, used at runtime. */
185 union RepeatControl {
186     struct RepeatRingControl ring;
187     struct RepeatRangeControl range;
188     struct RepeatOffsetControl offset;
189     struct RepeatBitmapControl bitmap;
190     struct RepeatTrailerControl trailer;
191 };
192 
193 /** For debugging, returns the name of a repeat model. */
194 static really_inline UNUSED
repeatTypeName(u8 type)195 const char *repeatTypeName(u8 type) {
196     switch ((enum RepeatType)type) {
197     case REPEAT_RING:
198         return "RING";
199     case REPEAT_FIRST:
200         return "FIRST";
201     case REPEAT_LAST:
202         return "LAST";
203     case REPEAT_RANGE:
204         return "RANGE";
205     case REPEAT_BITMAP:
206         return "BITMAP";
207     case REPEAT_SPARSE_OPTIMAL_P:
208         return "SPARSE_OPTIMAL_P";
209     case REPEAT_TRAILER:
210         return "TRAILER";
211     case REPEAT_ALWAYS:
212         return "ALWAYS";
213     }
214     assert(0);
215     return "UNKNOWN";
216 }
217 
218 #endif // REPEAT_INTERNAL_H
219