1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 
31 #include "limex.h"
32 
33 #include "accel.h"
34 #include "accel_dump.h"
35 #include "limex_internal.h"
36 #include "nfa_dump_internal.h"
37 #include "ue2common.h"
38 #include "util/charreach.h"
39 #include "util/dump_charclass.h"
40 #include "util/dump_mask.h"
41 #include "util/dump_util.h"
42 
43 #include <algorithm>
44 #include <cstdio>
45 #include <cctype>
46 #include <sstream>
47 #include <vector>
48 
49 #ifndef DUMP_SUPPORT
50 #error No dump support!
51 #endif
52 
53 using namespace std;
54 
55 namespace ue2 {
56 
57 template<typename T> struct limex_traits {};
58 template<> struct limex_traits<LimExNFA512> {
59     static const u32 size = 512;
60     typedef NFAException512 exception_type;
61 };
62 template<> struct limex_traits<LimExNFA384> {
63     static const u32 size = 384;
64     typedef NFAException384 exception_type;
65 };
66 template<> struct limex_traits<LimExNFA256> {
67     static const u32 size = 256;
68     typedef NFAException256 exception_type;
69 };
70 template<> struct limex_traits<LimExNFA128> {
71     static const u32 size = 128;
72     typedef NFAException128 exception_type;
73 };
74 template<> struct limex_traits<LimExNFA64> {
75     static const u32 size = 64;
76     typedef NFAException64 exception_type;
77 };
78 template<> struct limex_traits<LimExNFA32> {
79     static const u32 size = 32;
80     typedef NFAException32 exception_type;
81 };
82 
83 static
dumpMask(FILE * f,const char * name,const u8 * mask,u32 mask_bits)84 void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) {
85     fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str());
86 }
87 
88 template<typename mask_t>
89 static
rank_in_mask(const mask_t & mask,u32 bit)90 u32 rank_in_mask(const mask_t &mask, u32 bit) {
91     assert(bit < 8 * sizeof(mask));
92 
93     u32 chunks[sizeof(mask)/sizeof(u32)];
94     memcpy(chunks, &mask, sizeof(mask));
95     u32 base_rank = 0;
96     for (u32 i = 0; i < bit / 32; i++) {
97         base_rank += popcount32(chunks[i]);
98     }
99     u32 chunk = chunks[bit / 32];
100     u32 local_bit = bit % 32;
101     assert(chunk & (1U << local_bit));
102     return base_rank + popcount32(chunk & ((1U << local_bit) - 1));
103 }
104 
105 template <typename limex_type>
106 static
dumpRepeats(const limex_type * limex,u32 model_size,FILE * f)107 void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) {
108     fprintf(f, "\n");
109     fprintf(f, "%u bounded repeats.\n", limex->repeatCount);
110 
111     const char *base = (const char *)limex;
112     const u32 *repeatOffset = (const u32 *)(base + limex->repeatOffset);
113 
114     for (u32 i = 0; i < limex->repeatCount; i++) {
115         const NFARepeatInfo *info =
116             (const NFARepeatInfo *)(base + repeatOffset[i]);
117         const RepeatInfo *repeat =
118             (const RepeatInfo *)((const char *)info + sizeof(*info));
119         fprintf(f, "  repeat %u: %s {%u,%u} packedCtrlSize=%u, "
120                    "stateSize=%u\n",
121                 i, repeatTypeName(repeat->type), repeat->repeatMin,
122                 repeat->repeatMax, repeat->packedCtrlSize, repeat->stateSize);
123         fprintf(f, "    nfa state: stream offset %u\n", info->stateOffset);
124         fprintf(f, "    ");
125 
126         const u8 *tug_mask = (const u8 *)info + info->tugMaskOffset;
127         dumpMask(f, "tugs", tug_mask, model_size);
128     }
129 
130     fprintf(f, "\n");
131 }
132 
133 static
dumpLimexReachMasks(u32 model_size,const u8 * reach,u32 reachCount,FILE * f)134 void dumpLimexReachMasks(u32 model_size, const u8 *reach, u32 reachCount,
135                          FILE *f) {
136     for (u32 i = 0; i < reachCount; i++) {
137         char tmp_common[100];
138         const u8 *row = reach + (i * (model_size/8));
139         sprintf(tmp_common, "reach mask %u ", i);
140         dumpMask(f, tmp_common, row, model_size);
141     }
142 }
143 
144 static
dumpLimexReachMap(const u8 * reachMap,FILE * f)145 void dumpLimexReachMap(const u8 *reachMap, FILE *f) {
146     for (u32 i = 0; i < N_CHARS; i++) {
147         fprintf(f, "reach 0x%02x ", i);
148         if (!isprint(i)) {
149             fprintf(f, "   ");
150         } else {
151             fprintf(f, "'%c'", (char)i);
152         }
153         fprintf(f, " -> mask %hhu\n", reachMap[i]);
154     }
155 }
156 
157 template<typename limex_type>
158 static
limex_to_nfa(const limex_type * limex)159 const NFA *limex_to_nfa(const limex_type *limex) {
160     return (const NFA *)((const char *)limex - sizeof(NFA));
161 }
162 
163 template<typename limex_type>
164 static
dumpAccel(const limex_type * limex,FILE * f)165 void dumpAccel(const limex_type *limex, FILE *f) {
166     fprintf(f, "\n");
167     fprintf(f, "%u acceleration schemes.\n", limex->accelCount);
168 
169     if (!limex->accelCount) {
170         return;
171     }
172 
173     u32 tableOffset = limex->accelTableOffset;
174     u32 auxOffset = limex->accelAuxOffset;
175     const u8 *accelTable = (const u8 *)((const char *)limex + tableOffset);
176     const AccelAux *aux = (const AccelAux *)((const char *)limex + auxOffset);
177 
178     for (u32 i = 0; i < limex->accelCount; i++) {
179         fprintf(f, "  accel %u (aux entry %u): ", i, accelTable[i]);
180         dumpAccelInfo(f, aux[accelTable[i]]);
181     }
182 }
183 
184 static
dumpAcceptList(const char * limex_base,const struct NFAAccept * accepts,u32 acceptCount,FILE * f)185 void dumpAcceptList(const char *limex_base, const struct NFAAccept *accepts,
186                     u32 acceptCount, FILE *f) {
187     for (u32 i = 0; i < acceptCount; i++) {
188         const NFAAccept &a = accepts[i];
189         if (a.single_report) {
190             fprintf(f, "  idx %u fires single report %u\n", i, a.reports);
191             continue;
192         }
193         fprintf(f, "  idx %u fires report list %u:", i, a.reports);
194         const ReportID *report = (const ReportID *)(limex_base + a.reports);
195         for (; *report != MO_INVALID_IDX; report++) {
196             fprintf(f, " %u", *report);
197         }
198         fprintf(f, "\n");
199     }
200 }
201 
202 template<typename limex_type>
203 static
dumpAccepts(const limex_type * limex,FILE * f)204 void dumpAccepts(const limex_type *limex, FILE *f) {
205     const char *limex_base = (const char *)limex;
206 
207     const u32 acceptCount = limex->acceptCount;
208     const u32 acceptEodCount = limex->acceptEodCount;
209 
210     fprintf(f, "\n%u accepts.\n", acceptCount);
211     const auto *accepts =
212         (const struct NFAAccept *)(limex_base + limex->acceptOffset);
213     dumpAcceptList(limex_base, accepts, acceptCount, f);
214     fprintf(f, "\n%u accepts at EOD.\n", acceptEodCount);
215     const auto *accepts_eod =
216         (const struct NFAAccept *)(limex_base + limex->acceptEodOffset);
217     dumpAcceptList(limex_base, accepts_eod, acceptEodCount, f);
218     fprintf(f, "\n");
219 }
220 
221 template<typename limex_type>
222 static
dumpSquash(const limex_type * limex,FILE * f)223 void dumpSquash(const limex_type *limex, FILE *f) {
224     u32 size = limex_traits<limex_type>::size;
225 
226     // Dump squash masks, if there are any.
227     const u8 *squashMask = (const u8 *)limex + limex->squashOffset;
228     for (u32 i = 0; i < limex->squashCount; i++) {
229         std::ostringstream name;
230         name << "squash_" << i;
231         dumpMask(f, name.str().c_str(), squashMask, size);
232         squashMask += size / 8;
233     }
234 }
235 
236 template<typename limex_type>
237 static
238 const typename limex_traits<limex_type>::exception_type *
getExceptionTable(const limex_type * limex)239 getExceptionTable(const limex_type *limex) {
240     return (const typename limex_traits<limex_type>::exception_type *)
241         ((const char *)limex + limex->exceptionOffset);
242 }
243 
244 template<typename limex_type>
245 static
dumpLimexExceptions(const limex_type * limex,FILE * f)246 void dumpLimexExceptions(const limex_type *limex, FILE *f) {
247     const typename limex_traits<limex_type>::exception_type *e =
248                 getExceptionTable(limex);
249     const u32 size = limex_traits<limex_type>::size;
250 
251     const char *limex_base = (const char *)limex;
252 
253     fprintf(f, "\n");
254     for (u32 i = 0; i < limex->exceptionCount; i++) {
255         fprintf(f, "exception %u: hasSquash=%u, reports offset=%u\n",
256                 i, e[i].hasSquash, e[i].reports);
257         switch (e[i].trigger) {
258         case LIMEX_TRIGGER_TUG: fprintf(f, "  trigger: TUG\n"); break;
259         case LIMEX_TRIGGER_POS: fprintf(f, "  trigger: POS\n"); break;
260         default: break;
261         }
262         dumpMask(f, "succ", (const u8 *)&e[i].successors, size);
263         dumpMask(f, "squash", (const u8 *)&e[i].squash, size);
264         fprintf(f, "reports: ");
265         if (e[i].reports == MO_INVALID_IDX) {
266             fprintf(f, " <none>\n");
267         } else {
268             const ReportID *r = (const ReportID *)(limex_base + e[i].reports);
269             while (*r != MO_INVALID_IDX) {
270                 fprintf(f, " %u", *r++);
271             }
272             fprintf(f, "\n");
273         }
274 
275     }
276 }
277 
278 template<typename limex_type>
279 static
dumpLimexShifts(const limex_type * limex,FILE * f)280 void dumpLimexShifts(const limex_type *limex, FILE *f) {
281     u32 size = limex_traits<limex_type>::size;
282     fprintf(f, "Shift Masks:\n");
283     for(u32 i = 0; i < limex->shiftCount; i++) {
284         fprintf(f, "\t Shift %u(%hhu)\t\tMask: %s\n", i, limex->shiftAmount[i],
285                 dumpMask((const u8 *)&limex->shift[i], size).c_str());
286     }
287 }
288 template<typename limex_type>
289 static
dumpLimexText(const limex_type * limex,FILE * f)290 void dumpLimexText(const limex_type *limex, FILE *f) {
291     u32 size = limex_traits<limex_type>::size;
292 
293     fprintf(f, "%u-bit LimEx NFA (%u shifts, %u exceptions)\n", size,
294             limex->shiftCount, limex->exceptionCount);
295     fprintf(f, "flags: ");
296     if (limex->flags & LIMEX_FLAG_COMPRESS_STATE) {
297         fprintf(f, "COMPRESS_STATE ");
298     }
299     if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
300         fprintf(f, "COMPRESS_MASKED ");
301     }
302     if (limex->flags & LIMEX_FLAG_CANNOT_DIE) {
303         fprintf(f, "CANNOT_DIE ");
304     }
305     fprintf(f, "\n\n");
306 
307     dumpMask(f, "init", (const u8 *)&limex->init, size);
308     dumpMask(f, "init_dot_star", (const u8 *)&limex->initDS, size);
309     dumpMask(f, "accept", (const u8 *)&limex->accept, size);
310     dumpMask(f, "accept_at_eod", (const u8 *)&limex->acceptAtEOD, size);
311     dumpMask(f, "accel", (const u8 *)&limex->accel, size);
312     dumpMask(f, "accel_and_friends", (const u8 *)&limex->accel_and_friends,
313              size);
314     dumpMask(f, "compress_mask", (const u8 *)&limex->compressMask, size);
315     dumpMask(f, "emask", (const u8 *)&limex->exceptionMask, size);
316     dumpMask(f, "zombie", (const u8 *)&limex->zombieMask, size);
317 
318     // Dump top masks, if there are any.
319     u32 topCount = limex->topCount;
320     const u8 *topMask = (const u8 *)limex + limex->topOffset;
321     for (u32 i = 0; i < topCount; i++) {
322         std::ostringstream name;
323         name << "top_" << i;
324         dumpMask(f, name.str().c_str(), topMask, size);
325         topMask += size / 8;
326     }
327 
328     // Dump shift masks
329     dumpLimexShifts(limex, f);
330 
331     dumpSquash(limex, f);
332 
333     dumpLimexReachMap(limex->reachMap, f);
334     dumpLimexReachMasks(size, (const u8 *)limex + sizeof(*limex) /* reach*/,
335                         limex->reachSize, f);
336 
337     dumpAccepts(limex, f);
338 
339     dumpLimexExceptions<limex_type>(limex, f);
340 
341     dumpAccel<limex_type>(limex, f);
342 
343     dumpRepeats(limex, size, f);
344     dumpTextReverse(limex_to_nfa(limex), f);
345 }
346 
347 static
testbit(const u8 * mask,UNUSED u32 mask_bits,u32 bit)348 bool testbit(const u8 *mask, UNUSED u32 mask_bits, u32 bit) {
349     u32 byte = bit / 8;
350     return mask[byte] & (1 << (bit % 8));
351 }
352 
353 static
setupReach(const u8 * reachMap,const u8 * reachBase,u32 size,u32 state_count,vector<CharReach> * perStateReach)354 void setupReach(const u8 *reachMap, const u8 *reachBase, u32 size,
355                 u32 state_count, vector<CharReach> *perStateReach) {
356     for (u32 i = 0; i < state_count; i++) {
357         perStateReach->push_back(CharReach());
358         for (u32 j = 0; j < N_CHARS; j++) {
359             u8 k = reachMap[j];
360             const u8 *r = reachBase + k * (size/8);
361             if (testbit(r, size, i)) {
362                 perStateReach->back().set(j);
363             }
364         }
365     }
366 }
367 
368 namespace {
369 struct nfa_labeller {
~nfa_labellerue2::__anonca230ba30111::nfa_labeller370     virtual ~nfa_labeller() {}
371     virtual void label_state(FILE *f, u32 state) const = 0;
372 };
373 
374 template<typename limex_type>
375 struct limex_labeller : public nfa_labeller {
limex_labellerue2::__anonca230ba30111::limex_labeller376     explicit limex_labeller(const limex_type *limex_in) : limex(limex_in) {}
377 
label_stateue2::__anonca230ba30111::limex_labeller378     void label_state(FILE *f, u32 state) const override {
379         const typename limex_traits<limex_type>::exception_type *exceptions
380             = getExceptionTable(limex);
381         if (!testbit((const u8 *)&limex->exceptionMask,
382                      limex_traits<limex_type>::size, state)) {
383             return;
384         }
385 
386         u32 ex_index = rank_in_mask(limex->exceptionMask, state);
387         const typename limex_traits<limex_type>::exception_type *e
388             = &exceptions[ex_index];
389 
390         switch (e->trigger) {
391         case LIMEX_TRIGGER_TUG: fprintf(f, "\\nTUG"); break;
392         case LIMEX_TRIGGER_POS: fprintf(f, "\\nPOS"); break;
393         default: break;
394         }
395     }
396 
397     const limex_type *limex;
398 };
399 
400 }
401 
402 template<typename limex_type>
403 static
dumpVertexDotInfo(const limex_type * limex,u32 state_count,FILE * f,const nfa_labeller & labeller)404 void dumpVertexDotInfo(const limex_type *limex, u32 state_count, FILE *f,
405                        const nfa_labeller &labeller) {
406     u32 size = sizeof(limex->init) * 8;
407     const u8 *reach = (const u8 *)limex + sizeof(*limex);
408     vector<CharReach> perStateReach;
409     setupReach(limex->reachMap, reach, size, state_count, &perStateReach);
410 
411     const u8 *topMask = (const u8 *)limex + limex->topOffset;
412 
413     for (u32 state = 0; state < state_count; state++) {
414         fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
415                 "label = \"%u\\n", state, state);
416         assert(perStateReach[state].any());
417         describeClass(f, perStateReach[state], 5, CC_OUT_DOT);
418         labeller.label_state(f, state);
419         // bung in another couple lines to push char class (the widest thing) up a bit
420         fprintf(f, "\\n\\n\" ];\n");
421 
422         if (testbit((const u8 *)&limex->acceptAtEOD, size, state)) {
423             fprintf(f, "%u [ shape = box ];\n", state);
424         } else if (testbit((const u8 *)&limex->accept, size, state)) {
425             fprintf(f, "%u [ shape = doublecircle ];\n", state);
426         }
427         if (testbit((const u8 *)&limex->accel, size, state)) {
428             fprintf(f, "%u [ color = red style = diagonals];\n", state);
429         }
430         if (testbit((const u8 *)&limex->init, size, state)) {
431             fprintf(f, "START -> %u [ color = grey ];\n", state);
432         }
433 
434         // vertex could be in a top mask.
435         for (u32 i = 0; i < limex->topCount; i++) {
436             const u8 *msk = topMask + i * (size / 8);
437             if (testbit(msk, size, state)) {
438                 fprintf(f, "START -> %u [ color = grey, "
439                            "label = \"TOP %u\" ];\n",
440                         state, i);
441             }
442         }
443     }
444 }
445 
446 template<typename limex_type>
447 static
dumpExDotInfo(const limex_type * limex,u32 state,FILE * f)448 void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
449     u32 size = limex_traits<limex_type>::size;
450     if (!testbit((const u8 *)&limex->exceptionMask, size, state)) {
451         return; /* not exceptional */
452     }
453 
454     const typename limex_traits<limex_type>::exception_type *exceptions
455         = getExceptionTable(limex);
456 
457     u32 ex_index = rank_in_mask(limex->exceptionMask, state);
458     const typename limex_traits<limex_type>::exception_type *e
459         = &exceptions[ex_index];
460 
461     u32 state_count = limex_to_nfa(limex)->nPositions;
462 
463     for (u32 j = 0; j < state_count; j++) {
464         if (testbit((const u8 *)&e->successors, size, j)) {
465             fprintf(f, "%u -> %u [color = blue];\n", state, j);
466         }
467         if (!testbit((const u8 *)&e->squash, size, j)) {
468             fprintf(f, "%u -> %u [color = grey style = dashed];\n", state, j);
469         }
470     }
471     if (e->trigger != LIMEX_TRIGGER_NONE) {
472         fprintf(f, "%u [color = forestgreen];\n", state);
473     } else {
474         fprintf(f, "%u [color = blue];\n", state);
475     }
476 }
477 
478 template<typename limex_type>
479 static
dumpLimDotInfo(const limex_type * limex,u32 state,FILE * f)480 void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
481     for (u32 j = 0; j < limex->shiftCount; j++) {
482         const u32 shift_amount = limex->shiftAmount[j];
483         if (testbit((const u8 *)&limex->shift[j],
484                     limex_traits<limex_type>::size, state)) {
485             fprintf(f, "%u -> %u;\n", state, state + shift_amount);
486         }
487     }
488 }
489 
490 template<typename limex_type>
491 static
dumpLimexDot(const NFA * nfa,const limex_type * limex,FILE * f)492 void dumpLimexDot(const NFA *nfa, const limex_type *limex, FILE *f) {
493     dumpDotPreamble(f);
494     u32 state_count = nfa->nPositions;
495     dumpVertexDotInfo(limex, state_count, f, limex_labeller<limex_type>(limex));
496     for (u32 i = 0; i < state_count; i++) {
497         dumpLimDotInfo(limex, i, f);
498         dumpExDotInfo(limex, i, f);
499     }
500     dumpDotTrailer(f);
501 }
502 
503 #define LIMEX_DUMP_FN(size)                                                    \
504     void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) {       \
505         auto limex = (const LimExNFA##size *)getImplNfa(nfa);                  \
506         dumpLimexText(limex, StdioFile(base + ".txt", "w"));                   \
507         dumpLimexDot(nfa, limex, StdioFile(base + ".dot", "w"));               \
508     }
509 
510 LIMEX_DUMP_FN(32)
511 LIMEX_DUMP_FN(64)
512 LIMEX_DUMP_FN(128)
513 LIMEX_DUMP_FN(256)
514 LIMEX_DUMP_FN(384)
515 LIMEX_DUMP_FN(512)
516 
517 } // namespace ue2
518