1 /*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30
31 #include "limex.h"
32
33 #include "accel.h"
34 #include "accel_dump.h"
35 #include "limex_internal.h"
36 #include "nfa_dump_internal.h"
37 #include "ue2common.h"
38 #include "util/charreach.h"
39 #include "util/dump_charclass.h"
40 #include "util/dump_mask.h"
41 #include "util/dump_util.h"
42
43 #include <algorithm>
44 #include <cstdio>
45 #include <cctype>
46 #include <sstream>
47 #include <vector>
48
49 #ifndef DUMP_SUPPORT
50 #error No dump support!
51 #endif
52
53 using namespace std;
54
55 namespace ue2 {
56
57 template<typename T> struct limex_traits {};
58 template<> struct limex_traits<LimExNFA512> {
59 static const u32 size = 512;
60 typedef NFAException512 exception_type;
61 };
62 template<> struct limex_traits<LimExNFA384> {
63 static const u32 size = 384;
64 typedef NFAException384 exception_type;
65 };
66 template<> struct limex_traits<LimExNFA256> {
67 static const u32 size = 256;
68 typedef NFAException256 exception_type;
69 };
70 template<> struct limex_traits<LimExNFA128> {
71 static const u32 size = 128;
72 typedef NFAException128 exception_type;
73 };
74 template<> struct limex_traits<LimExNFA64> {
75 static const u32 size = 64;
76 typedef NFAException64 exception_type;
77 };
78 template<> struct limex_traits<LimExNFA32> {
79 static const u32 size = 32;
80 typedef NFAException32 exception_type;
81 };
82
83 static
dumpMask(FILE * f,const char * name,const u8 * mask,u32 mask_bits)84 void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) {
85 fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str());
86 }
87
88 template<typename mask_t>
89 static
rank_in_mask(const mask_t & mask,u32 bit)90 u32 rank_in_mask(const mask_t &mask, u32 bit) {
91 assert(bit < 8 * sizeof(mask));
92
93 u32 chunks[sizeof(mask)/sizeof(u32)];
94 memcpy(chunks, &mask, sizeof(mask));
95 u32 base_rank = 0;
96 for (u32 i = 0; i < bit / 32; i++) {
97 base_rank += popcount32(chunks[i]);
98 }
99 u32 chunk = chunks[bit / 32];
100 u32 local_bit = bit % 32;
101 assert(chunk & (1U << local_bit));
102 return base_rank + popcount32(chunk & ((1U << local_bit) - 1));
103 }
104
105 template <typename limex_type>
106 static
dumpRepeats(const limex_type * limex,u32 model_size,FILE * f)107 void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) {
108 fprintf(f, "\n");
109 fprintf(f, "%u bounded repeats.\n", limex->repeatCount);
110
111 const char *base = (const char *)limex;
112 const u32 *repeatOffset = (const u32 *)(base + limex->repeatOffset);
113
114 for (u32 i = 0; i < limex->repeatCount; i++) {
115 const NFARepeatInfo *info =
116 (const NFARepeatInfo *)(base + repeatOffset[i]);
117 const RepeatInfo *repeat =
118 (const RepeatInfo *)((const char *)info + sizeof(*info));
119 fprintf(f, " repeat %u: %s {%u,%u} packedCtrlSize=%u, "
120 "stateSize=%u\n",
121 i, repeatTypeName(repeat->type), repeat->repeatMin,
122 repeat->repeatMax, repeat->packedCtrlSize, repeat->stateSize);
123 fprintf(f, " nfa state: stream offset %u\n", info->stateOffset);
124 fprintf(f, " ");
125
126 const u8 *tug_mask = (const u8 *)info + info->tugMaskOffset;
127 dumpMask(f, "tugs", tug_mask, model_size);
128 }
129
130 fprintf(f, "\n");
131 }
132
133 static
dumpLimexReachMasks(u32 model_size,const u8 * reach,u32 reachCount,FILE * f)134 void dumpLimexReachMasks(u32 model_size, const u8 *reach, u32 reachCount,
135 FILE *f) {
136 for (u32 i = 0; i < reachCount; i++) {
137 char tmp_common[100];
138 const u8 *row = reach + (i * (model_size/8));
139 sprintf(tmp_common, "reach mask %u ", i);
140 dumpMask(f, tmp_common, row, model_size);
141 }
142 }
143
144 static
dumpLimexReachMap(const u8 * reachMap,FILE * f)145 void dumpLimexReachMap(const u8 *reachMap, FILE *f) {
146 for (u32 i = 0; i < N_CHARS; i++) {
147 fprintf(f, "reach 0x%02x ", i);
148 if (!isprint(i)) {
149 fprintf(f, " ");
150 } else {
151 fprintf(f, "'%c'", (char)i);
152 }
153 fprintf(f, " -> mask %hhu\n", reachMap[i]);
154 }
155 }
156
157 template<typename limex_type>
158 static
limex_to_nfa(const limex_type * limex)159 const NFA *limex_to_nfa(const limex_type *limex) {
160 return (const NFA *)((const char *)limex - sizeof(NFA));
161 }
162
163 template<typename limex_type>
164 static
dumpAccel(const limex_type * limex,FILE * f)165 void dumpAccel(const limex_type *limex, FILE *f) {
166 fprintf(f, "\n");
167 fprintf(f, "%u acceleration schemes.\n", limex->accelCount);
168
169 if (!limex->accelCount) {
170 return;
171 }
172
173 u32 tableOffset = limex->accelTableOffset;
174 u32 auxOffset = limex->accelAuxOffset;
175 const u8 *accelTable = (const u8 *)((const char *)limex + tableOffset);
176 const AccelAux *aux = (const AccelAux *)((const char *)limex + auxOffset);
177
178 for (u32 i = 0; i < limex->accelCount; i++) {
179 fprintf(f, " accel %u (aux entry %u): ", i, accelTable[i]);
180 dumpAccelInfo(f, aux[accelTable[i]]);
181 }
182 }
183
184 static
dumpAcceptList(const char * limex_base,const struct NFAAccept * accepts,u32 acceptCount,FILE * f)185 void dumpAcceptList(const char *limex_base, const struct NFAAccept *accepts,
186 u32 acceptCount, FILE *f) {
187 for (u32 i = 0; i < acceptCount; i++) {
188 const NFAAccept &a = accepts[i];
189 if (a.single_report) {
190 fprintf(f, " idx %u fires single report %u\n", i, a.reports);
191 continue;
192 }
193 fprintf(f, " idx %u fires report list %u:", i, a.reports);
194 const ReportID *report = (const ReportID *)(limex_base + a.reports);
195 for (; *report != MO_INVALID_IDX; report++) {
196 fprintf(f, " %u", *report);
197 }
198 fprintf(f, "\n");
199 }
200 }
201
202 template<typename limex_type>
203 static
dumpAccepts(const limex_type * limex,FILE * f)204 void dumpAccepts(const limex_type *limex, FILE *f) {
205 const char *limex_base = (const char *)limex;
206
207 const u32 acceptCount = limex->acceptCount;
208 const u32 acceptEodCount = limex->acceptEodCount;
209
210 fprintf(f, "\n%u accepts.\n", acceptCount);
211 const auto *accepts =
212 (const struct NFAAccept *)(limex_base + limex->acceptOffset);
213 dumpAcceptList(limex_base, accepts, acceptCount, f);
214 fprintf(f, "\n%u accepts at EOD.\n", acceptEodCount);
215 const auto *accepts_eod =
216 (const struct NFAAccept *)(limex_base + limex->acceptEodOffset);
217 dumpAcceptList(limex_base, accepts_eod, acceptEodCount, f);
218 fprintf(f, "\n");
219 }
220
221 template<typename limex_type>
222 static
dumpSquash(const limex_type * limex,FILE * f)223 void dumpSquash(const limex_type *limex, FILE *f) {
224 u32 size = limex_traits<limex_type>::size;
225
226 // Dump squash masks, if there are any.
227 const u8 *squashMask = (const u8 *)limex + limex->squashOffset;
228 for (u32 i = 0; i < limex->squashCount; i++) {
229 std::ostringstream name;
230 name << "squash_" << i;
231 dumpMask(f, name.str().c_str(), squashMask, size);
232 squashMask += size / 8;
233 }
234 }
235
236 template<typename limex_type>
237 static
238 const typename limex_traits<limex_type>::exception_type *
getExceptionTable(const limex_type * limex)239 getExceptionTable(const limex_type *limex) {
240 return (const typename limex_traits<limex_type>::exception_type *)
241 ((const char *)limex + limex->exceptionOffset);
242 }
243
244 template<typename limex_type>
245 static
dumpLimexExceptions(const limex_type * limex,FILE * f)246 void dumpLimexExceptions(const limex_type *limex, FILE *f) {
247 const typename limex_traits<limex_type>::exception_type *e =
248 getExceptionTable(limex);
249 const u32 size = limex_traits<limex_type>::size;
250
251 const char *limex_base = (const char *)limex;
252
253 fprintf(f, "\n");
254 for (u32 i = 0; i < limex->exceptionCount; i++) {
255 fprintf(f, "exception %u: hasSquash=%u, reports offset=%u\n",
256 i, e[i].hasSquash, e[i].reports);
257 switch (e[i].trigger) {
258 case LIMEX_TRIGGER_TUG: fprintf(f, " trigger: TUG\n"); break;
259 case LIMEX_TRIGGER_POS: fprintf(f, " trigger: POS\n"); break;
260 default: break;
261 }
262 dumpMask(f, "succ", (const u8 *)&e[i].successors, size);
263 dumpMask(f, "squash", (const u8 *)&e[i].squash, size);
264 fprintf(f, "reports: ");
265 if (e[i].reports == MO_INVALID_IDX) {
266 fprintf(f, " <none>\n");
267 } else {
268 const ReportID *r = (const ReportID *)(limex_base + e[i].reports);
269 while (*r != MO_INVALID_IDX) {
270 fprintf(f, " %u", *r++);
271 }
272 fprintf(f, "\n");
273 }
274
275 }
276 }
277
278 template<typename limex_type>
279 static
dumpLimexShifts(const limex_type * limex,FILE * f)280 void dumpLimexShifts(const limex_type *limex, FILE *f) {
281 u32 size = limex_traits<limex_type>::size;
282 fprintf(f, "Shift Masks:\n");
283 for(u32 i = 0; i < limex->shiftCount; i++) {
284 fprintf(f, "\t Shift %u(%hhu)\t\tMask: %s\n", i, limex->shiftAmount[i],
285 dumpMask((const u8 *)&limex->shift[i], size).c_str());
286 }
287 }
288 template<typename limex_type>
289 static
dumpLimexText(const limex_type * limex,FILE * f)290 void dumpLimexText(const limex_type *limex, FILE *f) {
291 u32 size = limex_traits<limex_type>::size;
292
293 fprintf(f, "%u-bit LimEx NFA (%u shifts, %u exceptions)\n", size,
294 limex->shiftCount, limex->exceptionCount);
295 fprintf(f, "flags: ");
296 if (limex->flags & LIMEX_FLAG_COMPRESS_STATE) {
297 fprintf(f, "COMPRESS_STATE ");
298 }
299 if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
300 fprintf(f, "COMPRESS_MASKED ");
301 }
302 if (limex->flags & LIMEX_FLAG_CANNOT_DIE) {
303 fprintf(f, "CANNOT_DIE ");
304 }
305 fprintf(f, "\n\n");
306
307 dumpMask(f, "init", (const u8 *)&limex->init, size);
308 dumpMask(f, "init_dot_star", (const u8 *)&limex->initDS, size);
309 dumpMask(f, "accept", (const u8 *)&limex->accept, size);
310 dumpMask(f, "accept_at_eod", (const u8 *)&limex->acceptAtEOD, size);
311 dumpMask(f, "accel", (const u8 *)&limex->accel, size);
312 dumpMask(f, "accel_and_friends", (const u8 *)&limex->accel_and_friends,
313 size);
314 dumpMask(f, "compress_mask", (const u8 *)&limex->compressMask, size);
315 dumpMask(f, "emask", (const u8 *)&limex->exceptionMask, size);
316 dumpMask(f, "zombie", (const u8 *)&limex->zombieMask, size);
317
318 // Dump top masks, if there are any.
319 u32 topCount = limex->topCount;
320 const u8 *topMask = (const u8 *)limex + limex->topOffset;
321 for (u32 i = 0; i < topCount; i++) {
322 std::ostringstream name;
323 name << "top_" << i;
324 dumpMask(f, name.str().c_str(), topMask, size);
325 topMask += size / 8;
326 }
327
328 // Dump shift masks
329 dumpLimexShifts(limex, f);
330
331 dumpSquash(limex, f);
332
333 dumpLimexReachMap(limex->reachMap, f);
334 dumpLimexReachMasks(size, (const u8 *)limex + sizeof(*limex) /* reach*/,
335 limex->reachSize, f);
336
337 dumpAccepts(limex, f);
338
339 dumpLimexExceptions<limex_type>(limex, f);
340
341 dumpAccel<limex_type>(limex, f);
342
343 dumpRepeats(limex, size, f);
344 dumpTextReverse(limex_to_nfa(limex), f);
345 }
346
347 static
testbit(const u8 * mask,UNUSED u32 mask_bits,u32 bit)348 bool testbit(const u8 *mask, UNUSED u32 mask_bits, u32 bit) {
349 u32 byte = bit / 8;
350 return mask[byte] & (1 << (bit % 8));
351 }
352
353 static
setupReach(const u8 * reachMap,const u8 * reachBase,u32 size,u32 state_count,vector<CharReach> * perStateReach)354 void setupReach(const u8 *reachMap, const u8 *reachBase, u32 size,
355 u32 state_count, vector<CharReach> *perStateReach) {
356 for (u32 i = 0; i < state_count; i++) {
357 perStateReach->push_back(CharReach());
358 for (u32 j = 0; j < N_CHARS; j++) {
359 u8 k = reachMap[j];
360 const u8 *r = reachBase + k * (size/8);
361 if (testbit(r, size, i)) {
362 perStateReach->back().set(j);
363 }
364 }
365 }
366 }
367
368 namespace {
369 struct nfa_labeller {
~nfa_labellerue2::__anonca230ba30111::nfa_labeller370 virtual ~nfa_labeller() {}
371 virtual void label_state(FILE *f, u32 state) const = 0;
372 };
373
374 template<typename limex_type>
375 struct limex_labeller : public nfa_labeller {
limex_labellerue2::__anonca230ba30111::limex_labeller376 explicit limex_labeller(const limex_type *limex_in) : limex(limex_in) {}
377
label_stateue2::__anonca230ba30111::limex_labeller378 void label_state(FILE *f, u32 state) const override {
379 const typename limex_traits<limex_type>::exception_type *exceptions
380 = getExceptionTable(limex);
381 if (!testbit((const u8 *)&limex->exceptionMask,
382 limex_traits<limex_type>::size, state)) {
383 return;
384 }
385
386 u32 ex_index = rank_in_mask(limex->exceptionMask, state);
387 const typename limex_traits<limex_type>::exception_type *e
388 = &exceptions[ex_index];
389
390 switch (e->trigger) {
391 case LIMEX_TRIGGER_TUG: fprintf(f, "\\nTUG"); break;
392 case LIMEX_TRIGGER_POS: fprintf(f, "\\nPOS"); break;
393 default: break;
394 }
395 }
396
397 const limex_type *limex;
398 };
399
400 }
401
402 template<typename limex_type>
403 static
dumpVertexDotInfo(const limex_type * limex,u32 state_count,FILE * f,const nfa_labeller & labeller)404 void dumpVertexDotInfo(const limex_type *limex, u32 state_count, FILE *f,
405 const nfa_labeller &labeller) {
406 u32 size = sizeof(limex->init) * 8;
407 const u8 *reach = (const u8 *)limex + sizeof(*limex);
408 vector<CharReach> perStateReach;
409 setupReach(limex->reachMap, reach, size, state_count, &perStateReach);
410
411 const u8 *topMask = (const u8 *)limex + limex->topOffset;
412
413 for (u32 state = 0; state < state_count; state++) {
414 fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
415 "label = \"%u\\n", state, state);
416 assert(perStateReach[state].any());
417 describeClass(f, perStateReach[state], 5, CC_OUT_DOT);
418 labeller.label_state(f, state);
419 // bung in another couple lines to push char class (the widest thing) up a bit
420 fprintf(f, "\\n\\n\" ];\n");
421
422 if (testbit((const u8 *)&limex->acceptAtEOD, size, state)) {
423 fprintf(f, "%u [ shape = box ];\n", state);
424 } else if (testbit((const u8 *)&limex->accept, size, state)) {
425 fprintf(f, "%u [ shape = doublecircle ];\n", state);
426 }
427 if (testbit((const u8 *)&limex->accel, size, state)) {
428 fprintf(f, "%u [ color = red style = diagonals];\n", state);
429 }
430 if (testbit((const u8 *)&limex->init, size, state)) {
431 fprintf(f, "START -> %u [ color = grey ];\n", state);
432 }
433
434 // vertex could be in a top mask.
435 for (u32 i = 0; i < limex->topCount; i++) {
436 const u8 *msk = topMask + i * (size / 8);
437 if (testbit(msk, size, state)) {
438 fprintf(f, "START -> %u [ color = grey, "
439 "label = \"TOP %u\" ];\n",
440 state, i);
441 }
442 }
443 }
444 }
445
446 template<typename limex_type>
447 static
dumpExDotInfo(const limex_type * limex,u32 state,FILE * f)448 void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
449 u32 size = limex_traits<limex_type>::size;
450 if (!testbit((const u8 *)&limex->exceptionMask, size, state)) {
451 return; /* not exceptional */
452 }
453
454 const typename limex_traits<limex_type>::exception_type *exceptions
455 = getExceptionTable(limex);
456
457 u32 ex_index = rank_in_mask(limex->exceptionMask, state);
458 const typename limex_traits<limex_type>::exception_type *e
459 = &exceptions[ex_index];
460
461 u32 state_count = limex_to_nfa(limex)->nPositions;
462
463 for (u32 j = 0; j < state_count; j++) {
464 if (testbit((const u8 *)&e->successors, size, j)) {
465 fprintf(f, "%u -> %u [color = blue];\n", state, j);
466 }
467 if (!testbit((const u8 *)&e->squash, size, j)) {
468 fprintf(f, "%u -> %u [color = grey style = dashed];\n", state, j);
469 }
470 }
471 if (e->trigger != LIMEX_TRIGGER_NONE) {
472 fprintf(f, "%u [color = forestgreen];\n", state);
473 } else {
474 fprintf(f, "%u [color = blue];\n", state);
475 }
476 }
477
478 template<typename limex_type>
479 static
dumpLimDotInfo(const limex_type * limex,u32 state,FILE * f)480 void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
481 for (u32 j = 0; j < limex->shiftCount; j++) {
482 const u32 shift_amount = limex->shiftAmount[j];
483 if (testbit((const u8 *)&limex->shift[j],
484 limex_traits<limex_type>::size, state)) {
485 fprintf(f, "%u -> %u;\n", state, state + shift_amount);
486 }
487 }
488 }
489
490 template<typename limex_type>
491 static
dumpLimexDot(const NFA * nfa,const limex_type * limex,FILE * f)492 void dumpLimexDot(const NFA *nfa, const limex_type *limex, FILE *f) {
493 dumpDotPreamble(f);
494 u32 state_count = nfa->nPositions;
495 dumpVertexDotInfo(limex, state_count, f, limex_labeller<limex_type>(limex));
496 for (u32 i = 0; i < state_count; i++) {
497 dumpLimDotInfo(limex, i, f);
498 dumpExDotInfo(limex, i, f);
499 }
500 dumpDotTrailer(f);
501 }
502
503 #define LIMEX_DUMP_FN(size) \
504 void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) { \
505 auto limex = (const LimExNFA##size *)getImplNfa(nfa); \
506 dumpLimexText(limex, StdioFile(base + ".txt", "w")); \
507 dumpLimexDot(nfa, limex, StdioFile(base + ".dot", "w")); \
508 }
509
510 LIMEX_DUMP_FN(32)
511 LIMEX_DUMP_FN(64)
512 LIMEX_DUMP_FN(128)
513 LIMEX_DUMP_FN(256)
514 LIMEX_DUMP_FN(384)
515 LIMEX_DUMP_FN(512)
516
517 } // namespace ue2
518