1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Large Bounded Repeat (LBR) engine: runtime code.
31  */
32 #include "lbr.h"
33 
34 #include "lbr_internal.h"
35 #include "nfa_api.h"
36 #include "nfa_api_queue.h"
37 #include "nfa_internal.h"
38 #include "repeat.h"
39 #include "repeat_internal.h"
40 #include "shufti.h"
41 #include "truffle.h"
42 #include "vermicelli.h"
43 #include "util/partial_store.h"
44 #include "util/unaligned.h"
45 
46 /** \brief Sentinel value used to indicate that a repeat is dead/empty/unused.
47  *  * */
48 #define REPEAT_DEAD 0xffffffffffffffffull
49 
50 enum MatchMode {
51     CALLBACK_OUTPUT,
52     STOP_AT_MATCH,
53 };
54 
55 static really_inline
getRepeatInfo(const struct lbr_common * l)56 const struct RepeatInfo *getRepeatInfo(const struct lbr_common *l) {
57     const struct RepeatInfo *repeatInfo =
58         (const struct RepeatInfo *)((const char *)l + l->repeatInfoOffset);
59     return repeatInfo;
60 }
61 
62 static really_inline
lbrCompressState(const struct lbr_common * l,u64a offset,const struct lbr_state * lstate,char * stream_state)63 void lbrCompressState(const struct lbr_common *l, u64a offset,
64                       const struct lbr_state *lstate, char *stream_state) {
65     assert(l && lstate && stream_state);
66     assert(ISALIGNED(lstate));
67 
68     const struct RepeatInfo *info = getRepeatInfo(l);
69     repeatPack(stream_state, info, &lstate->ctrl, offset);
70 }
71 
72 static really_inline
lbrExpandState(const struct lbr_common * l,u64a offset,const char * stream_state,struct lbr_state * lstate)73 void lbrExpandState(const struct lbr_common *l, u64a offset,
74                     const char *stream_state, struct lbr_state *lstate) {
75     assert(l && stream_state && lstate);
76     assert(ISALIGNED(lstate));
77 
78     const struct RepeatInfo *info = getRepeatInfo(l);
79     repeatUnpack(stream_state, info, offset, &lstate->ctrl);
80     lstate->lastEscape = 0;
81 }
82 
83 static really_inline
clearRepeat(const struct RepeatInfo * info,struct lbr_state * lstate)84 void clearRepeat(const struct RepeatInfo *info, struct lbr_state *lstate) {
85     assert(info && lstate);
86 
87     DEBUG_PRINTF("clear repeat at %p\n", lstate);
88 
89     switch ((enum RepeatType)info->type) {
90     case REPEAT_RING:
91         lstate->ctrl.ring.offset = REPEAT_DEAD;
92         break;
93     case REPEAT_RANGE:
94         lstate->ctrl.range.offset = REPEAT_DEAD;
95         break;
96     case REPEAT_FIRST:
97     case REPEAT_LAST:
98         lstate->ctrl.offset.offset = REPEAT_DEAD;
99         break;
100     case REPEAT_BITMAP:
101         lstate->ctrl.bitmap.offset = REPEAT_DEAD;
102         break;
103     case REPEAT_SPARSE_OPTIMAL_P:
104         lstate->ctrl.ring.offset = REPEAT_DEAD;
105         break;
106     case REPEAT_TRAILER:
107         lstate->ctrl.trailer.offset = REPEAT_DEAD;
108         break;
109     default:
110         assert(0);
111         break;
112     }
113 }
114 
115 static really_inline
repeatIsDead(const struct RepeatInfo * info,const struct lbr_state * lstate)116 char repeatIsDead(const struct RepeatInfo *info,
117                   const struct lbr_state *lstate) {
118     assert(info && lstate);
119 
120     switch ((enum RepeatType)info->type) {
121     case REPEAT_RING:
122         return lstate->ctrl.ring.offset == REPEAT_DEAD;
123     case REPEAT_RANGE:
124         return lstate->ctrl.range.offset == REPEAT_DEAD;
125     case REPEAT_FIRST:
126     case REPEAT_LAST:
127         return lstate->ctrl.offset.offset == REPEAT_DEAD;
128     case REPEAT_BITMAP:
129         return lstate->ctrl.bitmap.offset == REPEAT_DEAD;
130     case REPEAT_SPARSE_OPTIMAL_P:
131         return lstate->ctrl.ring.offset == REPEAT_DEAD;
132     case REPEAT_TRAILER:
133         return lstate->ctrl.trailer.offset == REPEAT_DEAD;
134     case REPEAT_ALWAYS:
135         assert(!"REPEAT_ALWAYS should only be used by Castle");
136         return 0;
137     }
138 
139     assert(0);
140     return 1;
141 }
142 
143 /** Returns true if the LBR can produce matches at offsets greater than the
144  * given one. TODO: can this be combined with lbrIsActive? */
145 static really_inline
lbrIsAlive(const struct lbr_common * l,const struct lbr_state * lstate,const char * state,u64a offset)146 char lbrIsAlive(const struct lbr_common *l, const struct lbr_state *lstate,
147                 const char *state, u64a offset) {
148     assert(l && lstate && state);
149 
150     const struct RepeatInfo *info = getRepeatInfo(l);
151     if (repeatIsDead(info, lstate)) {
152         DEBUG_PRINTF("repeat is dead\n");
153         return 0;
154     }
155 
156     if (info->repeatMax == REPEAT_INF) {
157         DEBUG_PRINTF("active repeat with inf max bound, alive\n");
158         return 1;
159     }
160 
161     assert(info->repeatMax < REPEAT_INF);
162     const char *repeatState = state + info->packedCtrlSize;
163     u64a lastTop = repeatLastTop(info, &lstate->ctrl, repeatState);
164     if (offset < lastTop + info->repeatMax) {
165         DEBUG_PRINTF("alive, as we can still produce matches after %llu\n",
166                      offset);
167         return 1;
168     }
169 
170     DEBUG_PRINTF("dead\n");
171     return 0;
172 }
173 
174 /** Returns true if the LBR is matching at the given offset or it could produce
175  * a match in the future. */
176 static really_inline
lbrIsActive(const struct lbr_common * l,const struct lbr_state * lstate,const char * state,u64a offset)177 char lbrIsActive(const struct lbr_common *l, const struct lbr_state *lstate,
178                  const char *state, u64a offset) {
179     assert(l && lstate && state);
180     const struct RepeatInfo *info = getRepeatInfo(l);
181     assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
182 
183     const char *repeatState = state + info->packedCtrlSize;
184     if (repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
185         REPEAT_MATCH) {
186         DEBUG_PRINTF("currently matching\n");
187         return 1;
188     }
189 
190     u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, offset);
191     if (i != 0) {
192         DEBUG_PRINTF("active, next match is at %llu\n", i);
193         return 1;
194     }
195 
196     DEBUG_PRINTF("no more matches\n");
197     return 0;
198 }
199 
200 static really_inline
lbrTop(const struct lbr_common * l,struct lbr_state * lstate,char * state,u64a offset)201 void lbrTop(const struct lbr_common *l, struct lbr_state *lstate, char *state,
202             u64a offset) {
203     assert(l && lstate && state);
204     DEBUG_PRINTF("top at %llu\n", offset);
205 
206     const struct RepeatInfo *info = getRepeatInfo(l);
207     char *repeatState = state + info->packedCtrlSize;
208 
209     char is_alive = !repeatIsDead(info, lstate);
210     if (is_alive) {
211         // Ignore duplicate TOPs.
212         u64a last = repeatLastTop(info, &lstate->ctrl, repeatState);
213         assert(last <= offset);
214         if (last == offset) {
215             return;
216         }
217     }
218 
219     repeatStore(info, &lstate->ctrl, repeatState, offset, is_alive);
220 }
221 
222 static really_inline
lbrInAccept(const struct lbr_common * l,const struct lbr_state * lstate,const char * state,u64a offset,ReportID report)223 char lbrInAccept(const struct lbr_common *l, const struct lbr_state *lstate,
224                  const char *state, u64a offset, ReportID report) {
225     assert(l && lstate && state);
226     DEBUG_PRINTF("offset=%llu, report=%u\n", offset, report);
227 
228     if (report != l->report) {
229         DEBUG_PRINTF("report=%u is not LBR report %u\n", report, l->report);
230         return 0;
231     }
232 
233     const struct RepeatInfo *info = getRepeatInfo(l);
234     assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
235 
236     const char *repeatState = state + info->packedCtrlSize;
237     return repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
238            REPEAT_MATCH;
239 }
240 
241 static really_inline
lbrFindMatch(const struct lbr_common * l,const u64a begin,const u64a end,const struct lbr_state * lstate,const char * state,size_t * mloc)242 char lbrFindMatch(const struct lbr_common *l, const u64a begin, const u64a end,
243                   const struct lbr_state *lstate, const char *state,
244                   size_t *mloc) {
245     DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
246     assert(begin <= end);
247 
248     if (begin == end) {
249         return 0;
250     }
251 
252     const struct RepeatInfo *info = getRepeatInfo(l);
253     const char *repeatState = state + info->packedCtrlSize;
254     u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, begin);
255     if (i == 0) {
256         DEBUG_PRINTF("no more matches\n");
257         return 0;
258     }
259     if (i > end) {
260         DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
261         return 0;
262     }
263 
264     DEBUG_PRINTF("stop at match at %llu\n", i);
265     assert(mloc);
266     *mloc = i - begin;
267     return 1;
268 }
269 
270 static really_inline
lbrMatchLoop(const struct lbr_common * l,const u64a begin,const u64a end,const struct lbr_state * lstate,const char * state,NfaCallback cb,void * ctx)271 char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
272                   const struct lbr_state *lstate, const char *state,
273                   NfaCallback cb, void *ctx) {
274     DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
275     assert(begin <= end);
276 
277     if (begin == end) {
278         return MO_CONTINUE_MATCHING;
279     }
280 
281     const struct RepeatInfo *info = getRepeatInfo(l);
282     const char *repeatState = state + info->packedCtrlSize;
283 
284     u64a i = begin;
285     for (;;) {
286         i = repeatNextMatch(info, &lstate->ctrl, repeatState, i);
287         if (i == 0) {
288             DEBUG_PRINTF("no more matches\n");
289             return MO_CONTINUE_MATCHING;
290         }
291         if (i > end) {
292             DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
293             return MO_CONTINUE_MATCHING;
294         }
295 
296         DEBUG_PRINTF("firing match at %llu\n", i);
297         if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) {
298             return MO_HALT_MATCHING;
299         }
300     }
301 
302     assert(0);
303     return MO_CONTINUE_MATCHING;
304 }
305 
306 static really_inline
lbrRevScanDot(UNUSED const struct NFA * nfa,UNUSED const u8 * buf,UNUSED size_t begin,UNUSED size_t end,UNUSED size_t * loc)307 char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
308                    UNUSED size_t begin, UNUSED size_t end,
309                    UNUSED size_t *loc) {
310     assert(begin <= end);
311     assert(nfa->type == LBR_NFA_DOT);
312     // Nothing can kill a dot!
313     return 0;
314 }
315 
316 static really_inline
lbrRevScanVerm(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)317 char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf,
318                     size_t begin, size_t end, size_t *loc) {
319     assert(begin <= end);
320     assert(nfa->type == LBR_NFA_VERM);
321     const struct lbr_verm *l = getImplNfa(nfa);
322 
323     if (begin == end) {
324         return 0;
325     }
326 
327     const u8 *ptr = rvermicelliExec(l->c, 0, buf + begin, buf + end);
328     if (ptr == buf + begin - 1) {
329         DEBUG_PRINTF("no escape found\n");
330         return 0;
331     }
332 
333     assert(loc);
334     *loc = (size_t)(ptr - buf);
335     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
336     assert((char)*ptr == l->c);
337     return 1;
338 }
339 
340 static really_inline
lbrRevScanNVerm(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)341 char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf,
342                      size_t begin, size_t end, size_t *loc) {
343     assert(begin <= end);
344     assert(nfa->type == LBR_NFA_NVERM);
345     const struct lbr_verm *l = getImplNfa(nfa);
346 
347     if (begin == end) {
348         return 0;
349     }
350 
351     const u8 *ptr = rnvermicelliExec(l->c, 0, buf + begin, buf + end);
352     if (ptr == buf + begin - 1) {
353         DEBUG_PRINTF("no escape found\n");
354         return 0;
355     }
356 
357     assert(loc);
358     *loc = (size_t)(ptr - buf);
359     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
360     assert((char)*ptr != l->c);
361     return 1;
362 }
363 
364 static really_inline
lbrRevScanShuf(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)365 char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf,
366                     size_t begin, size_t end,
367                     size_t *loc) {
368     assert(begin <= end);
369     assert(nfa->type == LBR_NFA_SHUF);
370     const struct lbr_shuf *l = getImplNfa(nfa);
371 
372     if (begin == end) {
373         return 0;
374     }
375 
376     const u8 *ptr = rshuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
377     if (ptr == buf + begin - 1) {
378         DEBUG_PRINTF("no escape found\n");
379         return 0;
380     }
381 
382     assert(loc);
383     *loc = (size_t)(ptr - buf);
384     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
385     return 1;
386 }
387 
388 static really_inline
lbrRevScanTruf(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)389 char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf,
390                     size_t begin, size_t end,
391                     size_t *loc) {
392     assert(begin <= end);
393     assert(nfa->type == LBR_NFA_TRUF);
394     const struct lbr_truf *l = getImplNfa(nfa);
395 
396     if (begin == end) {
397         return 0;
398     }
399 
400     const u8 *ptr = rtruffleExec(l->mask1, l->mask2, buf + begin, buf + end);
401     if (ptr == buf + begin - 1) {
402         DEBUG_PRINTF("no escape found\n");
403         return 0;
404     }
405 
406     assert(loc);
407     *loc = (size_t)(ptr - buf);
408     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
409     return 1;
410 }
411 
412 static really_inline
lbrFwdScanDot(UNUSED const struct NFA * nfa,UNUSED const u8 * buf,UNUSED size_t begin,UNUSED size_t end,UNUSED size_t * loc)413 char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
414                    UNUSED size_t begin, UNUSED size_t end,
415                    UNUSED size_t *loc) {
416     assert(begin <= end);
417     assert(nfa->type == LBR_NFA_DOT);
418     // Nothing can kill a dot!
419     return 0;
420 }
421 
422 static really_inline
lbrFwdScanVerm(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)423 char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf,
424                     size_t begin, size_t end, size_t *loc) {
425     assert(begin <= end);
426     assert(nfa->type == LBR_NFA_VERM);
427     const struct lbr_verm *l = getImplNfa(nfa);
428 
429     if (begin == end) {
430         return 0;
431     }
432 
433     const u8 *ptr = vermicelliExec(l->c, 0, buf + begin, buf + end);
434     if (ptr == buf + end) {
435         DEBUG_PRINTF("no escape found\n");
436         return 0;
437     }
438 
439     assert(loc);
440     *loc = (size_t)(ptr - buf);
441     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
442     assert((char)*ptr == l->c);
443     return 1;
444 }
445 
446 static really_inline
lbrFwdScanNVerm(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)447 char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf,
448                      size_t begin, size_t end, size_t *loc) {
449     assert(begin <= end);
450     assert(nfa->type == LBR_NFA_NVERM);
451     const struct lbr_verm *l = getImplNfa(nfa);
452 
453     if (begin == end) {
454         return 0;
455     }
456 
457     const u8 *ptr = nvermicelliExec(l->c, 0, buf + begin, buf + end);
458     if (ptr == buf + end) {
459         DEBUG_PRINTF("no escape found\n");
460         return 0;
461     }
462 
463     assert(loc);
464     *loc = (size_t)(ptr - buf);
465     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
466     assert((char)*ptr != l->c);
467     return 1;
468 }
469 
470 static really_inline
lbrFwdScanShuf(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)471 char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf,
472                     size_t begin, size_t end,
473                     size_t *loc) {
474     assert(begin <= end);
475     assert(nfa->type == LBR_NFA_SHUF);
476     const struct lbr_shuf *l = getImplNfa(nfa);
477 
478     if (begin == end) {
479         return 0;
480     }
481 
482     const u8 *ptr = shuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
483     if (ptr == buf + end) {
484         DEBUG_PRINTF("no escape found\n");
485         return 0;
486     }
487 
488     assert(loc);
489     *loc = (size_t)(ptr - buf);
490     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
491     return 1;
492 }
493 
494 static really_inline
lbrFwdScanTruf(const struct NFA * nfa,const u8 * buf,size_t begin,size_t end,size_t * loc)495 char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf,
496                     size_t begin, size_t end,
497                     size_t *loc) {
498     assert(begin <= end);
499     assert(nfa->type == LBR_NFA_TRUF);
500     const struct lbr_truf *l = getImplNfa(nfa);
501 
502     if (begin == end) {
503         return 0;
504     }
505 
506     const u8 *ptr = truffleExec(l->mask1, l->mask2, buf + begin, buf + end);
507     if (ptr == buf + end) {
508         DEBUG_PRINTF("no escape found\n");
509         return 0;
510     }
511 
512     assert(loc);
513     *loc = (size_t)(ptr - buf);
514     DEBUG_PRINTF("escape found at offset %zu\n", *loc);
515     return 1;
516 }
517 
518 #define ENGINE_ROOT_NAME Dot
519 #include "lbr_common_impl.h"
520 
521 #define ENGINE_ROOT_NAME Verm
522 #include "lbr_common_impl.h"
523 
524 #define ENGINE_ROOT_NAME NVerm
525 #include "lbr_common_impl.h"
526 
527 #define ENGINE_ROOT_NAME Shuf
528 #include "lbr_common_impl.h"
529 
530 #define ENGINE_ROOT_NAME Truf
531 #include "lbr_common_impl.h"
532