1 /*
2 * Copyright (c) 2015-2020, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30
31 #include "rose_build_dump.h"
32
33 #include "rose_build_impl.h"
34 #include "rose_build_matchers.h"
35 #include "rose_internal.h"
36 #include "rose_program.h"
37 #include "ue2common.h"
38 #include "hs_compile.h"
39 #include "hwlm/hwlm_build.h"
40 #include "hwlm/hwlm_dump.h"
41 #include "hwlm/hwlm_literal.h"
42 #include "nfa/castlecompile.h"
43 #include "nfa/nfa_build_util.h"
44 #include "nfa/nfa_dump_api.h"
45 #include "nfa/nfa_internal.h"
46 #include "nfagraph/ng_dump.h"
47 #include "som/slot_manager_dump.h"
48 #include "util/compile_context.h"
49 #include "util/container.h"
50 #include "util/dump_charclass.h"
51 #include "util/dump_util.h"
52 #include "util/graph_range.h"
53 #include "util/multibit.h"
54 #include "util/multibit_build.h"
55 #include "util/ue2string.h"
56
57 #include <iomanip>
58 #include <numeric>
59 #include <ostream>
60 #include <set>
61 #include <sstream>
62 #include <string>
63 #include <vector>
64
65 #ifndef DUMP_SUPPORT
66 #error No dump support!
67 #endif
68
69 using namespace std;
70
71 namespace ue2 {
72
73 /** \brief Return the kind of a left_id or a suffix_id. */
74 template<class Graph>
render_kind(const Graph & g)75 string render_kind(const Graph &g) {
76 if (g.graph()) {
77 return to_string(g.graph()->kind);
78 }
79 if (g.dfa()) {
80 return to_string(g.dfa()->kind);
81 }
82 if (g.haig()) {
83 return to_string(g.haig()->kind);
84 }
85 if (g.castle()) {
86 return to_string(g.castle()->kind);
87 }
88 return "UNKNOWN";
89 }
90
91 namespace {
92
93 struct rose_off {
rose_offue2::__anon85d01f500111::rose_off94 explicit rose_off(u32 j) : i(j) {}
95 string str(void) const;
96 u32 i;
97 };
98
operator <<(ostream & o,const rose_off & to)99 ostream &operator<<(ostream &o, const rose_off &to) {
100 if (to.i == ROSE_BOUND_INF) {
101 o << "inf";
102 } else {
103 o << to.i;
104 }
105 return o;
106 }
107
str(void) const108 string rose_off::str(void) const {
109 ostringstream out;
110 out << *this;
111 return out.str();
112 }
113
114 class RoseGraphWriter {
115 public:
RoseGraphWriter(const RoseBuildImpl & b_in,const map<u32,u32> & frag_map_in,const map<left_id,u32> & lqm_in,const map<suffix_id,u32> & sqm_in)116 RoseGraphWriter(const RoseBuildImpl &b_in, const map<u32, u32> &frag_map_in,
117 const map<left_id, u32> &lqm_in,
118 const map<suffix_id, u32> &sqm_in)
119 : frag_map(frag_map_in), leftfix_queue_map(lqm_in),
120 suffix_queue_map(sqm_in), build(b_in) {
121 for (const auto &m : build.ghost) {
122 ghost.insert(m.second);
123 }
124 }
125
operator ()(ostream & os,const RoseVertex & v) const126 void operator() (ostream &os, const RoseVertex &v) const {
127 const RoseGraph &g = build.g;
128
129 if (v == build.root) {
130 os << "[label=\"<root>\"]";
131 return;
132 }
133
134 if (v == build.anchored_root) {
135 os << "[label=\"<^>\"]";
136 return;
137 }
138
139 os << "[label=\"";
140 os << "index=" << g[v].index <<"\\n";
141
142 for (u32 lit_id : g[v].literals) {
143 writeLiteral(os, lit_id);
144 os << "\\n";
145 }
146
147 os << "min_offset=" << g[v].min_offset;
148 if (g[v].max_offset >= ROSE_BOUND_INF) {
149 os << ", max_offset=inf";
150 } else {
151 os << ", max_offset=" << g[v].max_offset;
152 }
153 os << "\\n";
154
155 if (!g[v].reports.empty()) {
156 if (g[v].eod_accept) {
157 os << "\\nACCEPT_EOD";
158 } else {
159 os << "\\nACCEPT";
160 }
161 os << " (rep=" << as_string_list(g[v].reports) << ")";
162 }
163
164 if (g[v].suffix) {
165 suffix_id suff(g[v].suffix);
166 os << "\\n" << render_kind(suff) << " (top " << g[v].suffix.top;
167 auto it = suffix_queue_map.find(suff);
168 if (it != end(suffix_queue_map)) {
169 os << ", queue " << it->second;
170 }
171 os << ")";
172 }
173
174 if (ghost.find(v) != ghost.end()) {
175 os << "\\nGHOST";
176 }
177
178 if (g[v].left) {
179 left_id left(g[v].left);
180 os << "\\n" << render_kind(left) << " (queue ";
181 auto it = leftfix_queue_map.find(left);
182 if (it != end(leftfix_queue_map)) {
183 os << it->second;
184 } else {
185 os << "??";
186 }
187 os << ", report " << g[v].left.leftfix_report << ")";
188 }
189
190 os << "\"";
191
192 // Roles with a rose prefix get a colour.
193 if (g[v].left) {
194 os << " color=violetred ";
195 }
196
197 // Our accepts get different colours.
198 if (!g[v].reports.empty()) {
199 os << " color=blue ";
200 }
201 if (g[v].suffix) {
202 os << " color=forestgreen ";
203 }
204
205 os << "]";
206 }
207
operator ()(ostream & os,const RoseEdge & e) const208 void operator() (ostream &os, const RoseEdge &e) const {
209 const RoseGraph &g = build.g;
210
211 // Render the bounds on this edge.
212 u32 minBound = g[e].minBound;
213 u32 maxBound = g[e].maxBound;
214
215 os << "[label=\"";
216 if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
217 os << ".*";
218 } else if (minBound == 1 && maxBound == ROSE_BOUND_INF) {
219 os << ".+";
220 } else {
221 os << ".{" << minBound << ",";
222 if (maxBound != ROSE_BOUND_INF) {
223 os << maxBound;
224 }
225 os << "}";
226 }
227
228 // If we lead to an infix, display which top we're using.
229 RoseVertex v = target(e, g);
230 if (g[v].left) {
231 os << "\\nROSE TOP " << g[e].rose_top;
232 }
233
234 switch (g[e].history) {
235 case ROSE_ROLE_HISTORY_NONE:
236 break;
237 case ROSE_ROLE_HISTORY_ANCH:
238 os << "\\nANCH history";
239 break;
240 case ROSE_ROLE_HISTORY_LAST_BYTE:
241 os << "\\nLAST_BYTE history";
242 break;
243 case ROSE_ROLE_HISTORY_INVALID:
244 os << "\\nINVALID history";
245 break;
246 }
247
248 os << "\"]";
249 }
250
251 private:
252 // Render the literal associated with a vertex.
writeLiteral(ostream & os,u32 id) const253 void writeLiteral(ostream &os, u32 id) const {
254 os << "lit=" << id;
255 if (contains(frag_map, id)) {
256 os << "/" << frag_map.at(id) << " ";
257 } else {
258 os << "/nofrag ";
259 }
260
261 const auto &lit = build.literals.at(id);
262 os << '\'' << dotEscapeString(lit.s.get_string()) << '\'';
263 if (lit.s.any_nocase()) {
264 os << " (nocase)";
265 }
266 if (lit.delay) {
267 os << " +" << lit.delay;
268 }
269 }
270
271 set<RoseVertex> ghost;
272 const map<u32, u32> &frag_map;
273 const map<left_id, u32> &leftfix_queue_map;
274 const map<suffix_id, u32> &suffix_queue_map;
275 const RoseBuildImpl &build;
276 };
277
278 } // namespace
279
280 static
makeFragMap(const vector<LitFragment> & fragments)281 map<u32, u32> makeFragMap(const vector<LitFragment> &fragments) {
282 map<u32, u32> fm;
283 for (const auto &f : fragments) {
284 for (u32 id : f.lit_ids) {
285 fm[id] = f.fragment_id;
286 }
287 }
288
289 return fm;
290 }
291
292 static
dumpRoseGraph(const RoseBuildImpl & build,const RoseEngine * t,const vector<LitFragment> & fragments,const map<left_id,u32> & leftfix_queue_map,const map<suffix_id,u32> & suffix_queue_map,const char * filename)293 void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t,
294 const vector<LitFragment> &fragments,
295 const map<left_id, u32> &leftfix_queue_map,
296 const map<suffix_id, u32> &suffix_queue_map,
297 const char *filename) {
298 const Grey &grey = build.cc.grey;
299
300 /* "early" rose graphs should only be dumped if we are dumping intermediate
301 * graphs. Early graphs can be identified by the lack of a RoseEngine. */
302 u32 flag_test = t ? Grey::DUMP_IMPL : Grey::DUMP_INT_GRAPH;
303
304 if (!(grey.dumpFlags & flag_test)) {
305 return;
306 }
307
308 stringstream ss;
309 ss << grey.dumpPath << filename;
310
311 DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str());
312 ofstream os(ss.str());
313
314 auto frag_map = makeFragMap(fragments);
315 RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map);
316 writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g));
317 }
318
dumpRoseGraph(const RoseBuildImpl & build,const char * filename)319 void dumpRoseGraph(const RoseBuildImpl &build, const char *filename) {
320 dumpRoseGraph(build, nullptr, {}, {}, {}, filename);
321 }
322
323 namespace {
324 struct CompareVertexRole {
CompareVertexRoleue2::__anon85d01f500211::CompareVertexRole325 explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {}
operator ()ue2::__anon85d01f500211::CompareVertexRole326 inline bool operator()(const RoseVertex &a, const RoseVertex &b) const {
327 return g[a].index < g[b].index;
328 }
329 private:
330 const RoseGraph &g;
331 };
332 }
333
334 static
lit_graph_info(const RoseBuildImpl & build,const rose_literal_info & li,u32 * min_offset,bool * in_root_role)335 void lit_graph_info(const RoseBuildImpl &build, const rose_literal_info &li,
336 u32 *min_offset, bool *in_root_role) {
337 *min_offset = ~0U;
338 *in_root_role = false;
339 for (auto v : li.vertices) {
340 *in_root_role |= build.isRootSuccessor(v);
341
342 LIMIT_TO_AT_MOST(min_offset, build.g[v].min_offset);
343 }
344 }
345
346 static
dumpRoseLiterals(const RoseBuildImpl & build,const vector<LitFragment> & fragments,const Grey & grey)347 void dumpRoseLiterals(const RoseBuildImpl &build,
348 const vector<LitFragment> &fragments,
349 const Grey &grey) {
350 const RoseGraph &g = build.g;
351 map<u32, u32> frag_map = makeFragMap(fragments);
352
353 DEBUG_PRINTF("dumping literals\n");
354 ofstream os(grey.dumpPath + "rose_literals.txt");
355
356 os << "ROSE LITERALS: a total of " << build.literals.size()
357 << " literals and " << num_vertices(g) << " roles." << endl
358 << endl;
359
360 for (u32 id = 0; id < build.literals.size(); id++) {
361 const auto &lit = build.literals.at(id);
362 const ue2_literal &s = lit.s;
363 const rose_literal_info &lit_info = build.literal_info[id];
364
365 switch (lit.table) {
366 case ROSE_ANCHORED:
367 os << "ANCHORED";
368 break;
369 case ROSE_FLOATING:
370 os << "FLOATING";
371 break;
372 case ROSE_EOD_ANCHORED:
373 os << "EOD-ANCHORED";
374 break;
375 case ROSE_ANCHORED_SMALL_BLOCK:
376 os << "SMALL-BLOCK";
377 break;
378 case ROSE_EVENT:
379 os << "EVENT";
380 break;
381 }
382
383 os << " ID " << id;
384 if (contains(frag_map, id)) {
385 os << "/" << frag_map.at(id);
386 }
387 os << ": \"" << escapeString(s.get_string()) << "\""
388 << " (len " << s.length() << ",";
389 if (s.any_nocase()) {
390 os << " nocase,";
391 }
392 if (lit_info.requires_benefits) {
393 os << " benefits,";
394 }
395
396 if (lit.delay) {
397 os << " delayed "<< lit.delay << ",";
398 }
399
400 os << " groups 0x" << hex << setw(16) << setfill('0')
401 << lit_info.group_mask << dec << ",";
402
403 if (lit_info.squash_group) {
404 os << " squashes group,";
405 }
406
407 u32 min_offset;
408 bool in_root_role;
409 lit_graph_info(build, lit_info, &min_offset, &in_root_role);
410 os << " min offset " << min_offset;
411 if (in_root_role) {
412 os << " root literal";
413 }
414
415 os << ") roles=" << lit_info.vertices.size() << endl;
416
417 if (!lit_info.delayed_ids.empty()) {
418 os << " Children:";
419 for (u32 d_id : lit_info.delayed_ids) {
420 os << " " << d_id;
421 }
422 os << endl;
423 }
424
425 // Temporary vector, so that we can sort the output by role.
426 vector<RoseVertex> verts(lit_info.vertices.begin(),
427 lit_info.vertices.end());
428 sort(verts.begin(), verts.end(), CompareVertexRole(g));
429
430 for (RoseVertex v : verts) {
431 // role info
432 os << " Index " << g[v].index << ": groups=0x" << hex << setw(16)
433 << setfill('0') << g[v].groups << dec;
434
435 if (g[v].reports.empty()) {
436 os << ", report=NONE";
437 } else {
438 os << ", report={" << as_string_list(g[v].reports) << "}";
439 }
440
441 os << ", min_offset=" << g[v].min_offset;
442 os << ", max_offset=" << g[v].max_offset << endl;
443 // pred info
444 for (const auto &ie : in_edges_range(v, g)) {
445 const auto &u = source(ie, g);
446 os << " Predecessor index=";
447 if (u == build.root) {
448 os << "ROOT";
449 } else if (u == build.anchored_root) {
450 os << "ANCHORED_ROOT";
451 } else {
452 os << g[u].index;
453 }
454 os << ": bounds [" << g[ie].minBound << ", ";
455 if (g[ie].maxBound == ROSE_BOUND_INF) {
456 os << "inf";
457 } else {
458 os << g[ie].maxBound;
459 }
460 os << "]" << endl;
461 }
462 }
463 }
464
465 os.close();
466 }
467
468 template<class Iter>
469 static
toHex(Iter i,const Iter & end)470 string toHex(Iter i, const Iter &end) {
471 ostringstream oss;
472 for (; i != end; ++i) {
473 u8 c = *i;
474 oss << hex << setw(2) << setfill('0') << ((unsigned)c & 0xff);
475 }
476 return oss.str();
477 }
478
479 static
isMetaChar(char c)480 bool isMetaChar(char c) {
481 switch (c) {
482 case '#':
483 case '$':
484 case '(':
485 case ')':
486 case '*':
487 case '+':
488 case '.':
489 case '/':
490 case '?':
491 case '[':
492 case '\\':
493 case ']':
494 case '^':
495 case '{':
496 case '|':
497 case '}':
498 return true;
499 default:
500 return false;
501 }
502 }
503
504 static
toRegex(const string & lit)505 string toRegex(const string &lit) {
506 ostringstream os;
507 for (char c : lit) {
508 if (0x20 <= c && c <= 0x7e) {
509 if (isMetaChar(c)) {
510 os << "\\" << c;
511 } else {
512 os << c;
513 }
514 } else if (c == '\n') {
515 os << "\\n";
516 } else if (c == '\r') {
517 os << "\\r";
518 } else if (c == '\t') {
519 os << "\\t";
520 } else {
521 os << "\\x" << hex << setw(2) << setfill('0')
522 << (unsigned)(c & 0xff) << dec;
523 }
524 }
525 return os.str();
526 }
527
dumpMatcherLiterals(const vector<hwlmLiteral> & lits,const string & name,const Grey & grey)528 void dumpMatcherLiterals(const vector<hwlmLiteral> &lits, const string &name,
529 const Grey &grey) {
530 if (!grey.dumpFlags) {
531 return;
532 }
533
534 ofstream of(grey.dumpPath + "rose_" + name + "_test_literals.txt");
535
536 // Unique regex index, as literals may share an ID.
537 u32 i = 0;
538
539 for (const hwlmLiteral &lit : lits) {
540 // First, detail in a comment.
541 of << "# id=" << lit.id;
542 if (!lit.msk.empty()) {
543 of << " msk=0x" << toHex(lit.msk.begin(), lit.msk.end());
544 of << " cmp=0x" << toHex(lit.cmp.begin(), lit.cmp.end());
545 }
546 of << " groups=0x" << hex << setfill('0') << lit.groups << dec;
547 if (lit.noruns) {
548 of << " noruns";
549 }
550 of << endl;
551
552 // Second, literal rendered as a regex.
553 of << i << ":/" << toRegex(lit.s) << (lit.nocase ? "/i" : "/");
554
555 of << endl;
556
557 i++;
558 }
559
560 of.close();
561 }
562
563 static
loadFromByteCodeOffset(const RoseEngine * t,u32 offset)564 const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) {
565 if (!offset) {
566 return nullptr;
567 }
568
569 const char *lt = (const char *)t + offset;
570 return lt;
571 }
572
573 static
getAnchoredMatcher(const RoseEngine * t)574 const void *getAnchoredMatcher(const RoseEngine *t) {
575 return loadFromByteCodeOffset(t, t->amatcherOffset);
576 }
577
578 static
getFloatingMatcher(const RoseEngine * t)579 const HWLM *getFloatingMatcher(const RoseEngine *t) {
580 return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset);
581 }
582
583 static
getDelayRebuildMatcher(const RoseEngine * t)584 const HWLM *getDelayRebuildMatcher(const RoseEngine *t) {
585 return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset);
586 }
587
588 static
getEodMatcher(const RoseEngine * t)589 const HWLM *getEodMatcher(const RoseEngine *t) {
590 return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset);
591 }
592
593 static
getSmallBlockMatcher(const RoseEngine * t)594 const HWLM *getSmallBlockMatcher(const RoseEngine *t) {
595 return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset);
596 }
597
598 static
bitvectorToReach(const u8 * reach)599 CharReach bitvectorToReach(const u8 *reach) {
600 CharReach cr;
601
602 for (size_t i = 0; i < N_CHARS; i++) {
603 if (reach[i / 8] & (1U << (i % 8))) {
604 cr.set(i);
605 }
606 }
607 return cr;
608 }
609
610 static
multiBitvectorToReach(const u8 * reach,u8 path_mask)611 CharReach multiBitvectorToReach(const u8 *reach, u8 path_mask) {
612 CharReach cr;
613 for (size_t i = 0; i < N_CHARS; i++) {
614 if (reach[i] & path_mask) {
615 cr.set(i);
616 }
617 }
618 return cr;
619 }
620
621 static
dumpLookaround(ofstream & os,const RoseEngine * t,const ROSE_STRUCT_CHECK_LOOKAROUND * ri)622 void dumpLookaround(ofstream &os, const RoseEngine *t,
623 const ROSE_STRUCT_CHECK_LOOKAROUND *ri) {
624 assert(ri);
625
626 const u8 *base = (const u8 *)t;
627
628 const s8 *look = (const s8 *)base + ri->look_index;
629 const s8 *look_end = look + ri->count;
630 const u8 *reach = base + ri->reach_index;
631
632 os << " contents:" << endl;
633
634 for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
635 os << " " << std::setw(4) << std::setfill(' ') << int{*look}
636 << ": ";
637 describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
638 os << endl;
639 }
640 }
641
642 static
dumpMultipathLookaround(ofstream & os,const RoseEngine * t,const ROSE_STRUCT_MULTIPATH_LOOKAROUND * ri)643 void dumpMultipathLookaround(ofstream &os, const RoseEngine *t,
644 const ROSE_STRUCT_MULTIPATH_LOOKAROUND *ri) {
645 assert(ri);
646
647 const u8 *base = (const u8 *)t;
648
649 const s8 *look_begin = (const s8 *)base + ri->look_index;
650 const s8 *look_end = look_begin + ri->count;
651 const u8 *reach_begin = base + ri->reach_index;
652
653 os << " contents:" << endl;
654
655 u32 path_mask = ri->start_mask[0];
656 while (path_mask) {
657 u32 path = findAndClearLSB_32(&path_mask);
658 os << " Path #" << path << ":" << endl;
659 os << " ";
660
661 const s8 *look = look_begin;
662 const u8 *reach = reach_begin;
663 for (; look < look_end; look++, reach += MULTI_REACH_BITVECTOR_LEN) {
664 CharReach cr = multiBitvectorToReach(reach, 1U << path);
665 if (cr.any() && !cr.all()) {
666 os << "<" << int(*look) << ": ";
667 describeClass(os, cr, 1000, CC_OUT_TEXT);
668 os << "> ";
669 }
670 }
671 os << endl;
672 }
673 }
674
675 static
sparseIterValues(const mmbit_sparse_iter * it,u32 num_bits)676 vector<u32> sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) {
677 vector<u32> keys;
678
679 if (num_bits == 0) {
680 return keys;
681 }
682
683 // Populate a multibit structure with all-ones. Note that the multibit
684 // runtime assumes that it is always safe to read 8 bytes, so we must
685 // over-allocate for smaller sizes.
686 const size_t num_bytes = mmbit_size(num_bits);
687 vector<u8> bits(max(size_t{8}, num_bytes), u8{0xff}); // All bits on.
688 const u8 *b = bits.data();
689 if (num_bytes < 8) {
690 b += 8 - num_bytes;
691 }
692
693 vector<mmbit_sparse_state> state(MAX_SPARSE_ITER_STATES);
694 mmbit_sparse_state *s = state.data();
695
696 u32 idx = 0;
697 u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s);
698 while (i != MMB_INVALID) {
699 keys.push_back(i);
700 i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s);
701 }
702
703 return keys;
704 }
705
706 static
dumpJumpTable(ofstream & os,const RoseEngine * t,const ROSE_STRUCT_SPARSE_ITER_BEGIN * ri)707 void dumpJumpTable(ofstream &os, const RoseEngine *t,
708 const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) {
709 auto *it =
710 (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset);
711 auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table);
712
713 for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) {
714 os << " " << std::setw(4) << std::setfill(' ') << key << " : +"
715 << *jumps << endl;
716 ++jumps;
717 }
718 }
719
720 static
dumpSomOperation(ofstream & os,const som_operation & op)721 void dumpSomOperation(ofstream &os, const som_operation &op) {
722 os << " som (type=" << u32{op.type} << ", onmatch=" << op.onmatch;
723 switch (op.type) {
724 case SOM_EXTERNAL_CALLBACK_REV_NFA:
725 case SOM_INTERNAL_LOC_SET_REV_NFA:
726 case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET:
727 case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE:
728 os << ", revNfaIndex=" << op.aux.revNfaIndex;
729 break;
730 default:
731 os << ", somDistance=" << op.aux.somDistance;
732 break;
733 }
734 os << ")" << endl;
735 }
736
737 static
dumpStrMask(const u8 * mask,size_t len)738 string dumpStrMask(const u8 *mask, size_t len) {
739 ostringstream oss;
740 for (size_t i = 0; i < len; i++) {
741 oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]}
742 << " ";
743 }
744 return oss.str();
745 }
746
747 static
shufti2cr(const u8 * lo,const u8 * hi,u8 bucket_mask)748 CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) {
749 CharReach cr;
750 for (u32 i = 0; i < N_CHARS; i++) {
751 if(lo[i & 0xf] & hi[i >> 4] & bucket_mask) {
752 cr.set(i);
753 }
754 }
755 return cr;
756 }
757
758 static
dumpLookaroundShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * bucket_mask,u64a neg_mask,s32 offset)759 void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
760 const u8 *bucket_mask, u64a neg_mask, s32 offset) {
761 assert(len == 16 || len == 32 || len == 64);
762 os << " contents:" << endl;
763 for (u32 idx = 0; idx < len; idx++) {
764 CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
765 if (neg_mask & (1ULL << idx)) {
766 cr.flip();
767 }
768
769 if (cr.any() && !cr.all()) {
770 os << " " << std::setw(4) << std::setfill(' ')
771 << int(offset + idx) << ": ";
772 describeClass(os, cr, 1000, CC_OUT_TEXT);
773 os << endl;
774 }
775 }
776 }
777
778 static
dumpLookaroundShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * lo_2,const u8 * hi_2,const u8 * bucket_mask,const u8 * bucket_mask_2,u64a neg_mask,s32 offset)779 void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
780 const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
781 const u8 *bucket_mask_2, u64a neg_mask, s32 offset) {
782 assert(len == 16 || len == 32 || len == 64);
783 os << " contents:" << endl;
784 for (u32 idx = 0; idx < len; idx++) {
785 CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
786 cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
787 if (neg_mask & (1ULL << idx)) {
788 cr.flip();
789 }
790
791 if (cr.any() && !cr.all()) {
792 os << " " << std::setw(4) << std::setfill(' ')
793 << int(offset + idx) << ": ";
794 describeClass(os, cr, 1000, CC_OUT_TEXT);
795 os << endl;
796 }
797 }
798 }
799
800 static
dumpMultipathShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * bucket_mask,const u8 * data_offset,u64a neg_mask,s32 base_offset)801 void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
802 const u8 *bucket_mask, const u8 *data_offset,
803 u64a neg_mask, s32 base_offset) {
804 assert(len == 16 || len == 32 || len == 64);
805 os << " contents:" << endl;
806 u32 path = 0;
807 for (u32 idx = 0; idx < len; idx++) {
808 CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
809
810 if (neg_mask & (1ULL << idx)) {
811 cr.flip();
812 }
813
814 if (cr.any() && !cr.all()) {
815 if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) {
816 path++;
817 if (idx) {
818 os << endl;
819 }
820 os << " Path #" << path << ":" << endl;
821 os << " ";
822 }
823
824 os << "<" << int(base_offset + data_offset[idx]) << ": ";
825 describeClass(os, cr, 1000, CC_OUT_TEXT);
826 os << "> ";
827 }
828 }
829 os << endl;
830 }
831
832 static
dumpMultipathShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * lo_2,const u8 * hi_2,const u8 * bucket_mask,const u8 * bucket_mask_2,const u8 * data_offset,u32 neg_mask,s32 base_offset)833 void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
834 const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
835 const u8 *bucket_mask_2, const u8 *data_offset,
836 u32 neg_mask, s32 base_offset) {
837 assert(len == 16 || len == 32 || len == 64);
838 os << " contents:";
839 u32 path = 0;
840 for (u32 idx = 0; idx < len; idx++) {
841 CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
842 cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
843
844 if (neg_mask & (1ULL << idx)) {
845 cr.flip();
846 }
847
848 if (cr.any() && !cr.all()) {
849 if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) {
850 path++;
851 os << endl;
852 os << " Path #" << path << ":" << endl;
853 os << " ";
854 }
855
856 os << "<" << int(base_offset + data_offset[idx]) << ": ";
857 describeClass(os, cr, 1000, CC_OUT_TEXT);
858 os << "> ";
859 }
860 }
861 os << endl;
862 }
863
864 #define PROGRAM_CASE(name) \
865 case ROSE_INSTR_##name: { \
866 os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \
867 << ": " #name "\n"; \
868 const auto *ri = (const struct ROSE_STRUCT_##name *)pc;
869
870 #define PROGRAM_NEXT_INSTRUCTION \
871 pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
872 break; \
873 }
874
875
876 static
dumpProgram(ofstream & os,const RoseEngine * t,const char * pc)877 void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
878 const char *pc_base = pc;
879 for (;;) {
880 u8 code = *(const u8 *)pc;
881 assert(code <= LAST_ROSE_INSTRUCTION);
882 const size_t offset = pc - pc_base;
883 switch (code) {
884 PROGRAM_CASE(END) { return; }
885 PROGRAM_NEXT_INSTRUCTION
886
887 PROGRAM_CASE(ANCHORED_DELAY) {
888 os << " groups 0x" << std::hex << ri->groups << std::dec
889 << endl;
890 os << " anch_id " << ri->anch_id << "\n";
891 os << " done_jump " << offset + ri->done_jump << endl;
892 }
893 PROGRAM_NEXT_INSTRUCTION
894
895 PROGRAM_CASE(CHECK_LIT_EARLY) {
896 os << " min_offset " << ri->min_offset << endl;
897 os << " fail_jump " << offset + ri->fail_jump << endl;
898 }
899 PROGRAM_NEXT_INSTRUCTION
900
901 PROGRAM_CASE(CHECK_GROUPS) {
902 os << " groups 0x" << std::hex << ri->groups << std::dec
903 << endl;
904 }
905 PROGRAM_NEXT_INSTRUCTION
906
907 PROGRAM_CASE(CHECK_ONLY_EOD) {
908 os << " fail_jump " << offset + ri->fail_jump << endl;
909 }
910 PROGRAM_NEXT_INSTRUCTION
911
912 PROGRAM_CASE(CHECK_BOUNDS) {
913 os << " min_bound " << ri->min_bound << endl;
914 os << " max_bound " << ri->max_bound << endl;
915 os << " fail_jump " << offset + ri->fail_jump << endl;
916 }
917 PROGRAM_NEXT_INSTRUCTION
918
919 PROGRAM_CASE(CHECK_NOT_HANDLED) {
920 os << " key " << ri->key << endl;
921 os << " fail_jump " << offset + ri->fail_jump << endl;
922 }
923 PROGRAM_NEXT_INSTRUCTION
924
925 PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) {
926 os << " offset " << int{ri->offset} << endl;
927 os << " reach_index " << ri->reach_index << endl;
928 os << " fail_jump " << offset + ri->fail_jump << endl;
929 const u8 *reach = (const u8 *)t + ri->reach_index;
930 os << " contents ";
931 describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
932 os << endl;
933 }
934 PROGRAM_NEXT_INSTRUCTION
935
936 PROGRAM_CASE(CHECK_LOOKAROUND) {
937 os << " look_index " << ri->look_index << endl;
938 os << " reach_index " << ri->reach_index << endl;
939 os << " count " << ri->count << endl;
940 os << " fail_jump " << offset + ri->fail_jump << endl;
941 dumpLookaround(os, t, ri);
942 }
943 PROGRAM_NEXT_INSTRUCTION
944
945 PROGRAM_CASE(CHECK_MASK) {
946 os << " and_mask 0x" << std::hex << std::setw(16)
947 << std::setfill('0') << ri->and_mask << std::dec << endl;
948 os << " cmp_mask 0x" << std::hex << std::setw(16)
949 << std::setfill('0') << ri->cmp_mask << std::dec << endl;
950 os << " neg_mask 0x" << std::hex << std::setw(16)
951 << std::setfill('0') << ri->neg_mask << std::dec << endl;
952 os << " offset " << ri->offset << endl;
953 os << " fail_jump " << offset + ri->fail_jump << endl;
954 }
955 PROGRAM_NEXT_INSTRUCTION
956
957 PROGRAM_CASE(CHECK_MASK_32) {
958 os << " and_mask "
959 << dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
960 << endl;
961 os << " cmp_mask "
962 << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
963 << endl;
964 os << " neg_mask 0x" << std::hex << std::setw(8)
965 << std::setfill('0') << ri->neg_mask << std::dec << endl;
966 os << " offset " << ri->offset << endl;
967 os << " fail_jump " << offset + ri->fail_jump << endl;
968 }
969 PROGRAM_NEXT_INSTRUCTION
970
971 PROGRAM_CASE(CHECK_MASK_64) {
972 os << " and_mask "
973 << dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
974 << endl;
975 os << " cmp_mask "
976 << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
977 << endl;
978 os << " neg_mask 0x" << std::hex << std::setw(8)
979 << std::setfill('0') << ri->neg_mask << std::dec << endl;
980 os << " offset " << ri->offset << endl;
981 os << " fail_jump " << offset + ri->fail_jump << endl;
982 }
983 PROGRAM_NEXT_INSTRUCTION
984
985 PROGRAM_CASE(CHECK_BYTE) {
986 os << " and_mask 0x" << std::hex << std::setw(2)
987 << std::setfill('0') << u32{ri->and_mask} << std::dec
988 << endl;
989 os << " cmp_mask 0x" << std::hex << std::setw(2)
990 << std::setfill('0') << u32{ri->cmp_mask} << std::dec
991 << endl;
992 os << " negation " << u32{ri->negation} << endl;
993 os << " offset " << ri->offset << endl;
994 os << " fail_jump " << offset + ri->fail_jump << endl;
995 }
996 PROGRAM_NEXT_INSTRUCTION
997
998 PROGRAM_CASE(CHECK_SHUFTI_16x8) {
999 os << " nib_mask "
1000 << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask))
1001 << endl;
1002 os << " bucket_select_mask "
1003 << dumpStrMask(ri->bucket_select_mask,
1004 sizeof(ri->bucket_select_mask))
1005 << endl;
1006 os << " neg_mask 0x" << std::hex << std::setw(8)
1007 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1008 os << " offset " << ri->offset << endl;
1009 os << " fail_jump " << offset + ri->fail_jump << endl;
1010 dumpLookaroundShufti(os, 16, ri->nib_mask, ri->nib_mask + 16,
1011 ri->bucket_select_mask, ri->neg_mask,
1012 ri->offset);
1013 }
1014 PROGRAM_NEXT_INSTRUCTION
1015
1016 PROGRAM_CASE(CHECK_SHUFTI_32x8) {
1017 os << " hi_mask "
1018 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1019 << endl;
1020 os << " lo_mask "
1021 << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1022 << endl;
1023 os << " bucket_select_mask "
1024 << dumpStrMask(ri->bucket_select_mask,
1025 sizeof(ri->bucket_select_mask))
1026 << endl;
1027 os << " neg_mask 0x" << std::hex << std::setw(8)
1028 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1029 os << " offset " << ri->offset << endl;
1030 os << " fail_jump " << offset + ri->fail_jump << endl;
1031 dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask,
1032 ri->bucket_select_mask, ri->neg_mask,
1033 ri->offset);
1034 }
1035 PROGRAM_NEXT_INSTRUCTION
1036
1037 PROGRAM_CASE(CHECK_SHUFTI_16x16) {
1038 os << " hi_mask "
1039 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1040 << endl;
1041 os << " lo_mask "
1042 << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1043 << endl;
1044 os << " bucket_select_mask "
1045 << dumpStrMask(ri->bucket_select_mask,
1046 sizeof(ri->bucket_select_mask))
1047 << endl;
1048 os << " neg_mask 0x" << std::hex << std::setw(8)
1049 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1050 os << " offset " << ri->offset << endl;
1051 os << " fail_jump " << offset + ri->fail_jump << endl;
1052 dumpLookaroundShufti(os, 16, ri->lo_mask, ri->hi_mask,
1053 ri->lo_mask + 16, ri->hi_mask + 16,
1054 ri->bucket_select_mask,
1055 ri->bucket_select_mask + 16,
1056 ri->neg_mask, ri->offset);
1057 }
1058 PROGRAM_NEXT_INSTRUCTION
1059
1060 PROGRAM_CASE(CHECK_SHUFTI_32x16) {
1061 os << " hi_mask "
1062 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1063 << endl;
1064 os << " lo_mask "
1065 << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1066 << endl;
1067 os << " bucket_select_mask_hi "
1068 << dumpStrMask(ri->bucket_select_mask_hi,
1069 sizeof(ri->bucket_select_mask_hi))
1070 << endl;
1071 os << " bucket_select_mask_lo "
1072 << dumpStrMask(ri->bucket_select_mask_lo,
1073 sizeof(ri->bucket_select_mask_lo))
1074 << endl;
1075 os << " neg_mask 0x" << std::hex << std::setw(8)
1076 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1077 os << " offset " << ri->offset << endl;
1078 os << " fail_jump " << offset + ri->fail_jump << endl;
1079 dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask,
1080 ri->lo_mask + 16, ri->hi_mask + 16,
1081 ri->bucket_select_mask_lo,
1082 ri->bucket_select_mask_hi,
1083 ri->neg_mask, ri->offset);
1084 }
1085 PROGRAM_NEXT_INSTRUCTION
1086
1087 PROGRAM_CASE(CHECK_SHUFTI_64x8) {
1088 os << " hi_mask "
1089 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1090 << endl;
1091 os << " lo_mask "
1092 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1093 << endl;
1094 os << " bucket_select_mask "
1095 << dumpStrMask(ri->bucket_select_mask,
1096 sizeof(ri->bucket_select_mask))
1097 << endl;
1098 os << " neg_mask 0x" << std::hex << std::setw(8)
1099 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1100 os << " offset " << ri->offset << endl;
1101 os << " fail_jump " << offset + ri->fail_jump << endl;
1102 dumpLookaroundShufti(os, 64, ri->lo_mask, ri->hi_mask,
1103 ri->bucket_select_mask, ri->neg_mask,
1104 ri->offset);
1105 }
1106 PROGRAM_NEXT_INSTRUCTION
1107
1108 PROGRAM_CASE(CHECK_SHUFTI_64x16) {
1109 os << " hi_mask_1 "
1110 << dumpStrMask(ri->hi_mask_1, sizeof(ri->hi_mask_1))
1111 << endl;
1112 os << " hi_mask_2 "
1113 << dumpStrMask(ri->hi_mask_2, sizeof(ri->hi_mask_2))
1114 << endl;
1115 os << " lo_mask_1 "
1116 << dumpStrMask(ri->lo_mask_1, sizeof(ri->lo_mask_1))
1117 << endl;
1118 os << " lo_mask_2 "
1119 << dumpStrMask(ri->lo_mask_2, sizeof(ri->lo_mask_2))
1120 << endl;
1121 os << " bucket_select_mask_hi "
1122 << dumpStrMask(ri->bucket_select_mask_hi,
1123 sizeof(ri->bucket_select_mask_hi))
1124 << endl;
1125 os << " bucket_select_mask_lo "
1126 << dumpStrMask(ri->bucket_select_mask_lo,
1127 sizeof(ri->bucket_select_mask_lo))
1128 << endl;
1129 os << " neg_mask 0x" << std::hex << std::setw(8)
1130 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1131 os << " offset " << ri->offset << endl;
1132 os << " fail_jump " << offset + ri->fail_jump << endl;
1133 dumpLookaroundShufti(os, 64, ri->lo_mask_1, ri->hi_mask_1,
1134 ri->lo_mask_2, ri->hi_mask_2,
1135 ri->bucket_select_mask_lo,
1136 ri->bucket_select_mask_hi,
1137 ri->neg_mask, ri->offset);
1138 }
1139 PROGRAM_NEXT_INSTRUCTION
1140
1141 PROGRAM_CASE(CHECK_INFIX) {
1142 os << " queue " << ri->queue << endl;
1143 os << " lag " << ri->lag << endl;
1144 os << " report " << ri->report << endl;
1145 os << " fail_jump " << offset + ri->fail_jump << endl;
1146 }
1147 PROGRAM_NEXT_INSTRUCTION
1148
1149 PROGRAM_CASE(CHECK_PREFIX) {
1150 os << " queue " << ri->queue << endl;
1151 os << " lag " << ri->lag << endl;
1152 os << " report " << ri->report << endl;
1153 os << " fail_jump " << offset + ri->fail_jump << endl;
1154 }
1155 PROGRAM_NEXT_INSTRUCTION
1156
1157 PROGRAM_CASE(PUSH_DELAYED) {
1158 os << " delay " << u32{ri->delay} << endl;
1159 os << " index " << ri->index << endl;
1160 }
1161 PROGRAM_NEXT_INSTRUCTION
1162
1163 PROGRAM_CASE(DUMMY_NOP) {}
1164 PROGRAM_NEXT_INSTRUCTION
1165
1166 PROGRAM_CASE(CATCH_UP) {}
1167 PROGRAM_NEXT_INSTRUCTION
1168
1169 PROGRAM_CASE(CATCH_UP_MPV) {}
1170 PROGRAM_NEXT_INSTRUCTION
1171
1172 PROGRAM_CASE(SOM_ADJUST) {
1173 os << " distance " << ri->distance << endl;
1174 }
1175 PROGRAM_NEXT_INSTRUCTION
1176
1177 PROGRAM_CASE(SOM_LEFTFIX) {
1178 os << " queue " << ri->queue << endl;
1179 os << " lag " << ri->lag << endl;
1180 }
1181 PROGRAM_NEXT_INSTRUCTION
1182
1183 PROGRAM_CASE(SOM_FROM_REPORT) {
1184 dumpSomOperation(os, ri->som);
1185 }
1186 PROGRAM_NEXT_INSTRUCTION
1187
1188 PROGRAM_CASE(SOM_ZERO) {}
1189 PROGRAM_NEXT_INSTRUCTION
1190
1191 PROGRAM_CASE(TRIGGER_INFIX) {
1192 os << " queue " << ri->queue << endl;
1193 os << " event " << ri->event << endl;
1194 os << " cancel " << u32{ri->cancel} << endl;
1195 }
1196 PROGRAM_NEXT_INSTRUCTION
1197
1198 PROGRAM_CASE(TRIGGER_SUFFIX) {
1199 os << " queue " << ri->queue << endl;
1200 os << " event " << ri->event << endl;
1201 }
1202 PROGRAM_NEXT_INSTRUCTION
1203
1204 PROGRAM_CASE(DEDUPE) {
1205 os << " quash_som " << u32{ri->quash_som} << endl;
1206 os << " dkey " << ri->dkey << endl;
1207 os << " offset_adjust " << ri->offset_adjust << endl;
1208 os << " fail_jump " << offset + ri->fail_jump << endl;
1209 }
1210 PROGRAM_NEXT_INSTRUCTION
1211
1212 PROGRAM_CASE(DEDUPE_SOM) {
1213 os << " quash_som " << u32{ri->quash_som} << endl;
1214 os << " dkey " << ri->dkey << endl;
1215 os << " offset_adjust " << ri->offset_adjust << endl;
1216 os << " fail_jump " << offset + ri->fail_jump << endl;
1217 }
1218 PROGRAM_NEXT_INSTRUCTION
1219
1220 PROGRAM_CASE(REPORT_CHAIN) {
1221 os << " event " << ri->event << endl;
1222 os << " top_squash_distance " << ri->top_squash_distance
1223 << endl;
1224 }
1225 PROGRAM_NEXT_INSTRUCTION
1226
1227 PROGRAM_CASE(REPORT_SOM_INT) {
1228 dumpSomOperation(os, ri->som);
1229 }
1230 PROGRAM_NEXT_INSTRUCTION
1231
1232 PROGRAM_CASE(REPORT_SOM_AWARE) {
1233 dumpSomOperation(os, ri->som);
1234 }
1235 PROGRAM_NEXT_INSTRUCTION
1236
1237 PROGRAM_CASE(REPORT) {
1238 os << " onmatch " << ri->onmatch << endl;
1239 os << " offset_adjust " << ri->offset_adjust << endl;
1240 }
1241 PROGRAM_NEXT_INSTRUCTION
1242
1243 PROGRAM_CASE(REPORT_EXHAUST) {
1244 os << " onmatch " << ri->onmatch << endl;
1245 os << " offset_adjust " << ri->offset_adjust << endl;
1246 os << " ekey " << ri->ekey << endl;
1247 }
1248 PROGRAM_NEXT_INSTRUCTION
1249
1250 PROGRAM_CASE(REPORT_SOM) {
1251 os << " onmatch " << ri->onmatch << endl;
1252 os << " offset_adjust " << ri->offset_adjust << endl;
1253 }
1254 PROGRAM_NEXT_INSTRUCTION
1255
1256 PROGRAM_CASE(REPORT_SOM_EXHAUST) {
1257 os << " onmatch " << ri->onmatch << endl;
1258 os << " offset_adjust " << ri->offset_adjust << endl;
1259 os << " ekey " << ri->ekey << endl;
1260 }
1261 PROGRAM_NEXT_INSTRUCTION
1262
1263 PROGRAM_CASE(DEDUPE_AND_REPORT) {
1264 os << " quash_som " << u32{ri->quash_som} << endl;
1265 os << " dkey " << ri->dkey << endl;
1266 os << " onmatch " << ri->onmatch << endl;
1267 os << " offset_adjust " << ri->offset_adjust << endl;
1268 os << " fail_jump " << offset + ri->fail_jump << endl;
1269 }
1270 PROGRAM_NEXT_INSTRUCTION
1271
1272 PROGRAM_CASE(FINAL_REPORT) {
1273 os << " onmatch " << ri->onmatch << endl;
1274 os << " offset_adjust " << ri->offset_adjust << endl;
1275 }
1276 PROGRAM_NEXT_INSTRUCTION
1277
1278 PROGRAM_CASE(CHECK_EXHAUSTED) {
1279 os << " ekey " << ri->ekey << endl;
1280 os << " fail_jump " << offset + ri->fail_jump << endl;
1281 }
1282 PROGRAM_NEXT_INSTRUCTION
1283
1284 PROGRAM_CASE(CHECK_MIN_LENGTH) {
1285 os << " end_adj " << ri->end_adj << endl;
1286 os << " min_length " << ri->min_length << endl;
1287 os << " fail_jump " << offset + ri->fail_jump << endl;
1288 }
1289 PROGRAM_NEXT_INSTRUCTION
1290
1291 PROGRAM_CASE(SET_STATE) {
1292 os << " index " << ri->index << endl;
1293 }
1294 PROGRAM_NEXT_INSTRUCTION
1295
1296 PROGRAM_CASE(SET_GROUPS) {
1297 os << " groups 0x" << std::hex << ri->groups << std::dec
1298 << endl;
1299 }
1300 PROGRAM_NEXT_INSTRUCTION
1301
1302 PROGRAM_CASE(SQUASH_GROUPS) {
1303 os << " groups 0x" << std::hex << ri->groups << std::dec
1304 << endl;
1305 }
1306 PROGRAM_NEXT_INSTRUCTION
1307
1308 PROGRAM_CASE(CHECK_STATE) {
1309 os << " index " << ri->index << endl;
1310 os << " fail_jump " << offset + ri->fail_jump << endl;
1311 }
1312 PROGRAM_NEXT_INSTRUCTION
1313
1314 PROGRAM_CASE(SPARSE_ITER_BEGIN) {
1315 os << " iter_offset " << ri->iter_offset << endl;
1316 os << " jump_table " << ri->jump_table << endl;
1317 dumpJumpTable(os, t, ri);
1318 os << " fail_jump " << offset + ri->fail_jump << endl;
1319 }
1320 PROGRAM_NEXT_INSTRUCTION
1321
1322 PROGRAM_CASE(SPARSE_ITER_NEXT) {
1323 os << " iter_offset " << ri->iter_offset << endl;
1324 os << " jump_table " << ri->jump_table << endl;
1325 os << " state " << ri->state << endl;
1326 os << " fail_jump " << offset + ri->fail_jump << endl;
1327 }
1328 PROGRAM_NEXT_INSTRUCTION
1329
1330 PROGRAM_CASE(SPARSE_ITER_ANY) {
1331 os << " iter_offset " << ri->iter_offset << endl;
1332 os << " fail_jump " << offset + ri->fail_jump << endl;
1333 }
1334 PROGRAM_NEXT_INSTRUCTION
1335
1336 PROGRAM_CASE(ENGINES_EOD) {
1337 os << " iter_offset " << ri->iter_offset << endl;
1338 }
1339 PROGRAM_NEXT_INSTRUCTION
1340
1341 PROGRAM_CASE(SUFFIXES_EOD) {}
1342 PROGRAM_NEXT_INSTRUCTION
1343
1344 PROGRAM_CASE(MATCHER_EOD) {}
1345 PROGRAM_NEXT_INSTRUCTION
1346
1347 PROGRAM_CASE(CHECK_LONG_LIT) {
1348 os << " lit_offset " << ri->lit_offset << endl;
1349 os << " lit_length " << ri->lit_length << endl;
1350 const char *lit = (const char *)t + ri->lit_offset;
1351 os << " literal: \""
1352 << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1353 os << " fail_jump " << offset + ri->fail_jump << endl;
1354 }
1355 PROGRAM_NEXT_INSTRUCTION
1356
1357 PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
1358 os << " lit_offset " << ri->lit_offset << endl;
1359 os << " lit_length " << ri->lit_length << endl;
1360 const char *lit = (const char *)t + ri->lit_offset;
1361 os << " literal: \""
1362 << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1363 os << " fail_jump " << offset + ri->fail_jump << endl;
1364 }
1365 PROGRAM_NEXT_INSTRUCTION
1366
1367 PROGRAM_CASE(CHECK_MED_LIT) {
1368 os << " lit_offset " << ri->lit_offset << endl;
1369 os << " lit_length " << ri->lit_length << endl;
1370 const char *lit = (const char *)t + ri->lit_offset;
1371 os << " literal: \""
1372 << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1373 os << " fail_jump " << offset + ri->fail_jump << endl;
1374 }
1375 PROGRAM_NEXT_INSTRUCTION
1376
1377 PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
1378 os << " lit_offset " << ri->lit_offset << endl;
1379 os << " lit_length " << ri->lit_length << endl;
1380 const char *lit = (const char *)t + ri->lit_offset;
1381 os << " literal: \""
1382 << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1383 os << " fail_jump " << offset + ri->fail_jump << endl;
1384 }
1385 PROGRAM_NEXT_INSTRUCTION
1386
1387 PROGRAM_CASE(CLEAR_WORK_DONE) {}
1388 PROGRAM_NEXT_INSTRUCTION
1389
1390 PROGRAM_CASE(MULTIPATH_LOOKAROUND) {
1391 os << " look_index " << ri->look_index << endl;
1392 os << " reach_index " << ri->reach_index << endl;
1393 os << " count " << ri->count << endl;
1394 os << " last_start " << ri->last_start << endl;
1395 os << " start_mask "
1396 << dumpStrMask(ri->start_mask, sizeof(ri->start_mask))
1397 << endl;
1398 os << " fail_jump " << offset + ri->fail_jump << endl;
1399 dumpMultipathLookaround(os, t, ri);
1400 }
1401 PROGRAM_NEXT_INSTRUCTION
1402
1403 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) {
1404 os << " nib_mask "
1405 << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask))
1406 << endl;
1407 os << " bucket_select_mask "
1408 << dumpStrMask(ri->bucket_select_mask,
1409 sizeof(ri->bucket_select_mask))
1410 << endl;
1411 os << " data_select_mask "
1412 << dumpStrMask(ri->data_select_mask,
1413 sizeof(ri->data_select_mask))
1414 << endl;
1415 os << " hi_bits_mask 0x" << std::hex << std::setw(4)
1416 << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1417 os << " lo_bits_mask 0x" << std::hex << std::setw(4)
1418 << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1419 os << " neg_mask 0x" << std::hex << std::setw(4)
1420 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1421 os << " base_offset " << ri->base_offset << endl;
1422 os << " last_start " << ri->last_start << endl;
1423 os << " fail_jump " << offset + ri->fail_jump << endl;
1424 dumpMultipathShufti(os, 16, ri->nib_mask, ri->nib_mask + 16,
1425 ri->bucket_select_mask,
1426 ri->data_select_mask,
1427 ri->neg_mask, ri->base_offset);
1428 }
1429 PROGRAM_NEXT_INSTRUCTION
1430
1431 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) {
1432 os << " hi_mask "
1433 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1434 << endl;
1435 os << " lo_mask "
1436 << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1437 << endl;
1438 os << " bucket_select_mask "
1439 << dumpStrMask(ri->bucket_select_mask,
1440 sizeof(ri->bucket_select_mask))
1441 << endl;
1442 os << " data_select_mask "
1443 << dumpStrMask(ri->data_select_mask,
1444 sizeof(ri->data_select_mask))
1445 << endl;
1446 os << " hi_bits_mask 0x" << std::hex << std::setw(8)
1447 << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1448 os << " lo_bits_mask 0x" << std::hex << std::setw(8)
1449 << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1450 os << " neg_mask 0x" << std::hex << std::setw(8)
1451 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1452 os << " base_offset " << ri->base_offset << endl;
1453 os << " last_start " << ri->last_start << endl;
1454 os << " fail_jump " << offset + ri->fail_jump << endl;
1455 dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask,
1456 ri->bucket_select_mask,
1457 ri->data_select_mask,
1458 ri->neg_mask, ri->base_offset);
1459 }
1460 PROGRAM_NEXT_INSTRUCTION
1461
1462 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) {
1463 os << " hi_mask "
1464 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1465 << endl;
1466 os << " lo_mask "
1467 << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1468 << endl;
1469 os << " bucket_select_mask_hi "
1470 << dumpStrMask(ri->bucket_select_mask_hi,
1471 sizeof(ri->bucket_select_mask_hi))
1472 << endl;
1473 os << " bucket_select_mask_lo "
1474 << dumpStrMask(ri->bucket_select_mask_lo,
1475 sizeof(ri->bucket_select_mask_lo))
1476 << endl;
1477 os << " data_select_mask "
1478 << dumpStrMask(ri->data_select_mask,
1479 sizeof(ri->data_select_mask))
1480 << endl;
1481 os << " hi_bits_mask 0x" << std::hex << std::setw(8)
1482 << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1483 os << " lo_bits_mask 0x" << std::hex << std::setw(8)
1484 << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1485 os << " neg_mask 0x" << std::hex << std::setw(8)
1486 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1487 os << " base_offset " << ri->base_offset << endl;
1488 os << " last_start " << ri->last_start << endl;
1489 os << " fail_jump " << offset + ri->fail_jump << endl;
1490 dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask,
1491 ri->lo_mask + 16, ri->hi_mask + 16,
1492 ri->bucket_select_mask_lo,
1493 ri->bucket_select_mask_hi,
1494 ri->data_select_mask,
1495 ri->neg_mask, ri->base_offset);
1496 }
1497 PROGRAM_NEXT_INSTRUCTION
1498
1499 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) {
1500 os << " hi_mask "
1501 << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1502 << endl;
1503 os << " lo_mask "
1504 << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1505 << endl;
1506 os << " bucket_select_mask "
1507 << dumpStrMask(ri->bucket_select_mask,
1508 sizeof(ri->bucket_select_mask))
1509 << endl;
1510 os << " data_select_mask "
1511 << dumpStrMask(ri->data_select_mask,
1512 sizeof(ri->data_select_mask))
1513 << endl;
1514 os << " hi_bits_mask 0x" << std::hex << std::setw(16)
1515 << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1516 os << " lo_bits_mask 0x" << std::hex << std::setw(16)
1517 << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1518 os << " neg_mask 0x" << std::hex << std::setw(16)
1519 << std::setfill('0') << ri->neg_mask << std::dec << endl;
1520 os << " base_offset " << ri->base_offset << endl;
1521 os << " last_start " << ri->last_start << endl;
1522 os << " fail_jump " << offset + ri->fail_jump << endl;
1523 dumpMultipathShufti(os, 64, ri->lo_mask, ri->hi_mask,
1524 ri->bucket_select_mask,
1525 ri->data_select_mask,
1526 ri->neg_mask, ri->base_offset);
1527 }
1528 PROGRAM_NEXT_INSTRUCTION
1529
1530 PROGRAM_CASE(INCLUDED_JUMP) {
1531 os << " child_offset " << ri->child_offset << endl;
1532 os << " squash " << (u32)ri->squash << endl;
1533 }
1534 PROGRAM_NEXT_INSTRUCTION
1535
1536 PROGRAM_CASE(SET_LOGICAL) {
1537 os << " lkey " << ri->lkey << endl;
1538 os << " offset_adjust " << ri->offset_adjust << endl;
1539 }
1540 PROGRAM_NEXT_INSTRUCTION
1541
1542 PROGRAM_CASE(SET_COMBINATION) {
1543 os << " ckey " << ri->ckey << endl;
1544 }
1545 PROGRAM_NEXT_INSTRUCTION
1546
1547 PROGRAM_CASE(FLUSH_COMBINATION) {}
1548 PROGRAM_NEXT_INSTRUCTION
1549
1550 PROGRAM_CASE(SET_EXHAUST) {
1551 os << " ekey " << ri->ekey << endl;
1552 }
1553 PROGRAM_NEXT_INSTRUCTION
1554
1555 PROGRAM_CASE(LAST_FLUSH_COMBINATION) {}
1556 PROGRAM_NEXT_INSTRUCTION
1557
1558 default:
1559 os << " UNKNOWN (code " << int{code} << ")" << endl;
1560 os << " <stopping>" << endl;
1561 return;
1562 }
1563 }
1564 }
1565
1566 #undef PROGRAM_CASE
1567 #undef PROGRAM_NEXT_INSTRUCTION
1568
1569 static
dumpRoseLitPrograms(const vector<LitFragment> & fragments,const RoseEngine * t,const string & filename)1570 void dumpRoseLitPrograms(const vector<LitFragment> &fragments,
1571 const RoseEngine *t, const string &filename) {
1572 ofstream os(filename);
1573
1574 // Collect all programs referenced by a literal fragment.
1575 vector<u32> programs;
1576 for (const auto &frag : fragments) {
1577 if (frag.lit_program_offset) {
1578 programs.push_back(frag.lit_program_offset);
1579 }
1580 if (frag.delay_program_offset) {
1581 programs.push_back(frag.delay_program_offset);
1582 }
1583 }
1584 sort_and_unique(programs);
1585
1586 for (u32 prog_offset : programs) {
1587 os << "Program @ " << prog_offset << ":" << endl;
1588 const char *prog = (const char *)loadFromByteCodeOffset(t, prog_offset);
1589 dumpProgram(os, t, prog);
1590 os << endl;
1591 }
1592
1593 os.close();
1594 }
1595
1596 static
dumpRoseEodPrograms(const RoseEngine * t,const string & filename)1597 void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
1598 ofstream os(filename);
1599 const char *base = (const char *)t;
1600
1601 if (t->eodProgramOffset) {
1602 os << "EOD Program @ " << t->eodProgramOffset << ":" << endl;
1603 dumpProgram(os, t, base + t->eodProgramOffset);
1604 os << endl;
1605 } else {
1606 os << "<No EOD Program>" << endl;
1607 }
1608
1609 os.close();
1610 }
1611
1612 static
dumpRoseFlushCombPrograms(const RoseEngine * t,const string & filename)1613 void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
1614 ofstream os(filename);
1615 const char *base = (const char *)t;
1616
1617 if (t->flushCombProgramOffset) {
1618 os << "Flush Combination Program @ " << t->flushCombProgramOffset
1619 << ":" << endl;
1620 dumpProgram(os, t, base + t->flushCombProgramOffset);
1621 os << endl;
1622 } else {
1623 os << "<No Flush Combination Program>" << endl;
1624 }
1625
1626 os.close();
1627 }
1628
1629 static
dumpRoseLastFlushCombPrograms(const RoseEngine * t,const string & filename)1630 void dumpRoseLastFlushCombPrograms(const RoseEngine *t,
1631 const string &filename) {
1632 ofstream os(filename);
1633 const char *base = (const char *)t;
1634
1635 if (t->lastFlushCombProgramOffset) {
1636 os << "Last Flush Combination Program @ "
1637 << t->lastFlushCombProgramOffset
1638 << ":" << endl;
1639 dumpProgram(os, t, base + t->lastFlushCombProgramOffset);
1640 os << endl;
1641 } else {
1642 os << "<No Last Flush Combination Program>" << endl;
1643 }
1644
1645 os.close();
1646 }
1647
1648 static
dumpRoseReportPrograms(const RoseEngine * t,const string & filename)1649 void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
1650 ofstream os(filename);
1651
1652 const u32 *programs =
1653 (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset);
1654
1655 for (u32 i = 0; i < t->reportProgramCount; i++) {
1656 os << "Report " << i << endl;
1657 os << "---------------" << endl;
1658
1659 if (programs[i]) {
1660 os << "Program @ " << programs[i] << ":" << endl;
1661 const char *prog =
1662 (const char *)loadFromByteCodeOffset(t, programs[i]);
1663 dumpProgram(os, t, prog);
1664 } else {
1665 os << "<No Program>" << endl;
1666 }
1667 }
1668
1669 os.close();
1670 }
1671
1672 static
dumpRoseAnchoredPrograms(const RoseEngine * t,const string & filename)1673 void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) {
1674 ofstream os(filename);
1675
1676 const u32 *programs =
1677 (const u32 *)loadFromByteCodeOffset(t, t->anchoredProgramOffset);
1678
1679 for (u32 i = 0; i < t->anchored_count; i++) {
1680 os << "Anchored entry " << i << endl;
1681 os << "---------------" << endl;
1682
1683 if (programs[i]) {
1684 os << "Program @ " << programs[i] << ":" << endl;
1685 const char *prog =
1686 (const char *)loadFromByteCodeOffset(t, programs[i]);
1687 dumpProgram(os, t, prog);
1688 } else {
1689 os << "<No Program>" << endl;
1690 }
1691 os << endl;
1692 }
1693
1694 os.close();
1695 }
1696
1697 static
dumpRoseDelayPrograms(const RoseEngine * t,const string & filename)1698 void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) {
1699 ofstream os(filename);
1700
1701 const u32 *programs =
1702 (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset);
1703
1704 for (u32 i = 0; i < t->delay_count; i++) {
1705 os << "Delay entry " << i << endl;
1706 os << "---------------" << endl;
1707
1708 if (programs[i]) {
1709 os << "Program @ " << programs[i] << ":" << endl;
1710 const char *prog =
1711 (const char *)loadFromByteCodeOffset(t, programs[i]);
1712 dumpProgram(os, t, prog);
1713 } else {
1714 os << "<No Program>" << endl;
1715 }
1716 os << endl;
1717 }
1718
1719 os.close();
1720 }
1721
1722 static
dumpNfaNotes(ofstream & fout,const RoseEngine * t,const NFA * n)1723 void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) {
1724 const u32 qindex = n->queueIndex;
1725
1726 if (qindex < t->outfixBeginQueue) {
1727 fout << "chained";
1728 return;
1729 }
1730
1731 if (qindex < t->outfixEndQueue) {
1732 fout << "outfix";
1733 return;
1734 }
1735
1736 const NfaInfo *nfa_info = getNfaInfoByQueue(t, qindex);
1737 const NFA *nfa = getNfaByInfo(t, nfa_info);
1738
1739 if (nfa_info->eod) {
1740 fout << "eod ";
1741 }
1742
1743 if (qindex < t->leftfixBeginQueue) {
1744 fout << "suffix";
1745 return;
1746 }
1747
1748 const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex);
1749 if (left->eager) {
1750 fout << "eager ";
1751 }
1752 if (left->transient) {
1753 fout << "transient " << (u32)left->transient << " ";
1754 }
1755 if (left->infix) {
1756 fout << "infix";
1757 u32 maxQueueLen = left->maxQueueLen;
1758 if (maxQueueLen != (u32)(-1)) {
1759 fout << " maxqlen=" << maxQueueLen;
1760 }
1761 } else {
1762 fout << "prefix";
1763 }
1764 fout << " maxlag=" << left->maxLag;
1765 if (left->stopTable) {
1766 fout << " miracles";
1767 }
1768 if (left->countingMiracleOffset) {
1769 const RoseCountingMiracle *cm
1770 = (const RoseCountingMiracle *)((const char *)t
1771 + left->countingMiracleOffset);
1772 fout << " counting_miracle:" << (int)cm->count
1773 << (cm->shufti ? "s" : "v");
1774 }
1775 if (nfaSupportsZombie(nfa)) {
1776 fout << " zombie";
1777 }
1778 if (left->eod_check) {
1779 fout << " eod";
1780 }
1781 }
1782
1783 static
dumpComponentInfo(const RoseEngine * t,const string & base)1784 void dumpComponentInfo(const RoseEngine *t, const string &base) {
1785 stringstream ss;
1786 ss << base << "rose_components.txt";
1787 ofstream fout(ss.str().c_str());
1788
1789 fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n";
1790
1791 for (u32 i = 0; i < t->queueCount; i++) {
1792 const NfaInfo *nfa_info = getNfaInfoByQueue(t, i);
1793 const NFA *n = getNfaByInfo(t, nfa_info);
1794
1795 fout << left << setw(6) << i << " ";
1796
1797 fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */
1798
1799 fout << left << setw(16) << describe(*n) << "\t";
1800
1801 fout << left << setw(6) << n->nPositions << " ";
1802 fout << left << setw(7) << n->streamStateSize << " ";
1803 fout << left << setw(7) << n->length << " ";
1804
1805 dumpNfaNotes(fout, t, n);
1806
1807 fout << endl;
1808 }
1809 }
1810
1811 static
dumpComponentInfoCsv(const RoseEngine * t,const string & base)1812 void dumpComponentInfoCsv(const RoseEngine *t, const string &base) {
1813 StdioFile f(base + "/rose_components.csv", "w");
1814
1815 fprintf(f, "Index, Offset,Engine Type,States,Stream State,"
1816 "Bytecode Size,Kind,Notes\n");
1817
1818 for (u32 i = 0; i < t->queueCount; i++) {
1819 const NfaInfo *nfa_info = getNfaInfoByQueue(t, i);
1820 const NFA *n = getNfaByInfo(t, nfa_info);
1821 nfa_kind kind;
1822 stringstream notes;
1823
1824 if (i < t->outfixBeginQueue) {
1825 notes << "chained;";
1826 }
1827
1828 if (nfa_info->eod) {
1829 notes << "eod;";
1830 }
1831
1832 if (i < t->outfixEndQueue) {
1833 kind = NFA_OUTFIX;
1834 } else if (i < t->leftfixBeginQueue) {
1835 kind = NFA_SUFFIX;
1836 } else {
1837 const LeftNfaInfo *left = getLeftInfoByQueue(t, i);
1838 if (left->eager) {
1839 notes << "eager;";
1840 }
1841 if (left->transient) {
1842 notes << "transient " << (u32)left->transient << ";";
1843 }
1844 if (left->infix) {
1845 kind = NFA_INFIX;
1846 u32 maxQueueLen = left->maxQueueLen;
1847 if (maxQueueLen != (u32)(-1)) {
1848 notes << "maxqlen=" << maxQueueLen << ";";
1849 }
1850 } else {
1851 kind = NFA_PREFIX;
1852 }
1853 notes << "maxlag=" << left->maxLag << ";";
1854 if (left->stopTable) {
1855 notes << "miracles;";
1856 }
1857 if (left->countingMiracleOffset) {
1858 auto cm = (const RoseCountingMiracle *)
1859 ((const char *)t + left->countingMiracleOffset);
1860 notes << "counting_miracle:" << (int)cm->count
1861 << (cm->shufti ? "s" : "v") << ";";
1862 }
1863 if (nfaSupportsZombie(n)) {
1864 notes << " zombie;";
1865 }
1866 if (left->eod_check) {
1867 notes << "left_eod;";
1868 }
1869 }
1870
1871 fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i,
1872 (const char *)n - (const char *)t, describe(*n).c_str(),
1873 n->nPositions, n->streamStateSize, n->length,
1874 to_string(kind).c_str(), notes.str().c_str());
1875 }
1876 }
1877
1878 static
dumpExhaust(const RoseEngine * t,const string & base)1879 void dumpExhaust(const RoseEngine *t, const string &base) {
1880 StdioFile f(base + "/rose_exhaust.csv", "w");
1881
1882 const NfaInfo *infos
1883 = (const NfaInfo *)((const char *)t + t->nfaInfoOffset);
1884
1885 u32 queue_count = t->activeArrayCount;
1886
1887 for (u32 i = 0; i < queue_count; ++i) {
1888 u32 ekey_offset = infos[i].ekeyListOffset;
1889
1890 fprintf(f, "%u (%u):", i, ekey_offset);
1891
1892 if (ekey_offset) {
1893 const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset);
1894 while (1) {
1895 u32 e = *ekeys;
1896 ++ekeys;
1897 if (e == ~0U) {
1898 break;
1899 }
1900 fprintf(f, " %u", e);
1901 }
1902 }
1903
1904 fprintf(f, "\n");
1905 }
1906 }
1907
1908 static
dumpNfas(const RoseEngine * t,bool dump_raw,const string & base)1909 void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) {
1910 dumpExhaust(t, base);
1911
1912 for (u32 i = 0; i < t->queueCount; i++) {
1913 const NfaInfo *nfa_info = getNfaInfoByQueue(t, i);
1914 const NFA *n = getNfaByInfo(t, nfa_info);
1915
1916 stringstream ssbase;
1917 ssbase << base << "rose_nfa_" << i;
1918 nfaGenerateDumpFiles(n, ssbase.str());
1919
1920 if (dump_raw) {
1921 stringstream ssraw;
1922 ssraw << base << "rose_nfa_" << i << ".raw";
1923 StdioFile f(ssraw.str(), "w");
1924 fwrite(n, 1, n->length, f);
1925 }
1926 }
1927 }
1928
1929 static
dumpRevComponentInfo(const RoseEngine * t,const string & base)1930 void dumpRevComponentInfo(const RoseEngine *t, const string &base) {
1931 stringstream ss;
1932 ss << base << "som_rev_components.txt";
1933 ofstream fout(ss.str().c_str());
1934
1935 fout << "Index Offset\tEngine \tStates S.State Bytes\n";
1936
1937 const char *tp = (const char *)t;
1938 const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset);
1939
1940 for (u32 i = 0; i < t->somRevCount; i++) {
1941 u32 offset = rev_offsets[i];
1942 const NFA *n = (const NFA *)(tp + offset);
1943
1944 fout << left << setw(6) << i << " ";
1945
1946 fout << left << offset << "\t"; /* offset */
1947
1948 fout << left << setw(16) << describe(*n) << "\t";
1949
1950 fout << left << setw(6) << n->nPositions << " ";
1951 fout << left << setw(7) << n->streamStateSize << " ";
1952 fout << left << setw(7) << n->length;
1953 fout << endl;
1954 }
1955 }
1956
1957 static
dumpRevNfas(const RoseEngine * t,bool dump_raw,const string & base)1958 void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) {
1959 const char *tp = (const char *)t;
1960 const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset);
1961
1962 for (u32 i = 0; i < t->somRevCount; i++) {
1963 const NFA *n = (const NFA *)(tp + rev_offsets[i]);
1964
1965 stringstream ssbase;
1966 ssbase << base << "som_rev_nfa_" << i;
1967 nfaGenerateDumpFiles(n, ssbase.str());
1968
1969 if (dump_raw) {
1970 stringstream ssraw;
1971 ssraw << base << "som_rev_nfa_" << i << ".raw";
1972 StdioFile f(ssraw.str(), "w");
1973 fwrite(n, 1, n->length, f);
1974 }
1975 }
1976 }
1977
1978 static
dumpAnchored(const RoseEngine * t,const string & base)1979 void dumpAnchored(const RoseEngine *t, const string &base) {
1980 u32 i = 0;
1981 const anchored_matcher_info *curr
1982 = (const anchored_matcher_info *)getALiteralMatcher(t);
1983
1984 while (curr) {
1985 const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr));
1986
1987 stringstream ssbase;
1988 ssbase << base << "anchored_" << i;
1989 nfaGenerateDumpFiles(n, ssbase.str());
1990
1991 curr = curr->next_offset ? (const anchored_matcher_info *)
1992 ((const char *)curr + curr->next_offset) : nullptr;
1993 i++;
1994 };
1995 }
1996
1997 static
dumpAnchoredStats(const void * atable,FILE * f)1998 void dumpAnchoredStats(const void *atable, FILE *f) {
1999 assert(atable);
2000
2001 u32 i = 0;
2002 const anchored_matcher_info *curr = (const anchored_matcher_info *)atable;
2003
2004 while (curr) {
2005 const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr));
2006
2007 fprintf(f, " NFA %u: %s, %u states (%u bytes)\n", i,
2008 describe(*n).c_str(), n->nPositions, n->length);
2009
2010 curr = curr->next_offset ? (const anchored_matcher_info *)
2011 ((const char *)curr + curr->next_offset) : nullptr;
2012 i++;
2013 };
2014
2015 }
2016
2017 static
dumpLongLiteralSubtable(const RoseLongLitTable * ll_table,const RoseLongLitSubtable * ll_sub,FILE * f)2018 void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table,
2019 const RoseLongLitSubtable *ll_sub, FILE *f) {
2020 if (!ll_sub->hashBits) {
2021 fprintf(f, " <no table>\n");
2022 return;
2023 }
2024
2025 const char *base = (const char *)ll_table;
2026
2027 u32 nbits = ll_sub->hashBits;
2028 u32 num_entries = 1U << nbits;
2029 const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset);
2030 u32 hash_occ =
2031 count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) {
2032 return ent.str_offset != 0;
2033 });
2034 float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100;
2035
2036 fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n",
2037 nbits, hash_occ, num_entries, hash_occ_percent);
2038
2039 u32 bloom_bits = ll_sub->bloomBits;
2040 u32 bloom_size = 1U << bloom_bits;
2041 const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset;
2042 u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0,
2043 [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); });
2044 float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100;
2045
2046 fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n",
2047 bloom_bits, bloom_occ, bloom_size, bloom_occ_percent);
2048 }
2049
2050 static
dumpLongLiteralTable(const RoseEngine * t,FILE * f)2051 void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
2052 if (!t->longLitTableOffset) {
2053 return;
2054 }
2055
2056 fprintf(f, "\n");
2057 fprintf(f, "Long literal table (streaming):\n");
2058
2059 const auto *ll_table =
2060 (const struct RoseLongLitTable *)loadFromByteCodeOffset(
2061 t, t->longLitTableOffset);
2062
2063 fprintf(f, " total size : %u bytes\n", ll_table->size);
2064 fprintf(f, " longest len : %u\n", ll_table->maxLen);
2065 fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes);
2066
2067 fprintf(f, " caseful:\n");
2068 dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f);
2069
2070 fprintf(f, " nocase:\n");
2071 dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f);
2072 }
2073
2074 static
roseDumpText(const RoseEngine * t,FILE * f)2075 void roseDumpText(const RoseEngine *t, FILE *f) {
2076 if (!t) {
2077 fprintf(f, "<< no rose >>\n");
2078 return;
2079 }
2080
2081 const void *atable = getAnchoredMatcher(t);
2082 const HWLM *ftable = getFloatingMatcher(t);
2083 const HWLM *drtable = getDelayRebuildMatcher(t);
2084 const HWLM *etable = getEodMatcher(t);
2085 const HWLM *sbtable = getSmallBlockMatcher(t);
2086
2087 fprintf(f, "Rose:\n\n");
2088
2089 fprintf(f, "mode: : ");
2090 switch(t->mode) {
2091 case HS_MODE_BLOCK:
2092 fprintf(f, "block");
2093 break;
2094 case HS_MODE_STREAM:
2095 fprintf(f, "streaming");
2096 break;
2097 case HS_MODE_VECTORED:
2098 fprintf(f, "vectored");
2099 break;
2100 }
2101 fprintf(f, "\n");
2102
2103 fprintf(f, "properties :");
2104 if (t->canExhaust) {
2105 fprintf(f, " canExhaust");
2106 }
2107 if (t->hasSom) {
2108 fprintf(f, " hasSom");
2109 }
2110 if (t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL) {
2111 fprintf(f, " pureLiteral");
2112 }
2113 if (t->runtimeImpl == ROSE_RUNTIME_SINGLE_OUTFIX) {
2114 fprintf(f, " soleOutfix");
2115 }
2116 fprintf(f, "\n");
2117
2118 fprintf(f, "dkey count : %u\n", t->dkeyCount);
2119 fprintf(f, "som slot count : %u\n", t->somLocationCount);
2120 fprintf(f, "som width : %u bytes\n", t->somHorizon);
2121 fprintf(f, "rose count : %u\n", t->roseCount);
2122 fprintf(f, "\n");
2123
2124 fprintf(f, "total engine size : %u bytes\n", t->size);
2125 fprintf(f, " - anchored matcher : %u bytes over %u bytes\n", t->asize,
2126 t->anchoredDistance);
2127 fprintf(f, " - floating matcher : %zu bytes%s",
2128 ftable ? hwlmSize(ftable) : 0, t->noFloatingRoots ? " (cond)":"");
2129 if (t->floatingMinDistance) {
2130 fprintf(f, " from %s bytes\n",
2131 rose_off(t->floatingMinDistance).str().c_str());
2132 }
2133 if (t->floatingDistance != ROSE_BOUND_INF && ftable) {
2134 fprintf(f, " over %u bytes\n", t->floatingDistance);
2135 } else {
2136 fprintf(f, "\n");
2137 }
2138 fprintf(f, " - delay-rb matcher : %zu bytes\n",
2139 drtable ? hwlmSize(drtable) : 0);
2140 fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n",
2141 etable ? hwlmSize(etable) : 0, t->ematcherRegionSize);
2142 fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n",
2143 sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance);
2144 fprintf(f, " - role state table : %zu bytes\n",
2145 t->rolesWithStateCount * sizeof(u32));
2146 fprintf(f, " - nfa info table : %zu bytes\n",
2147 t->queueCount * sizeof(NfaInfo));
2148
2149 fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end);
2150 fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
2151 fprintf(f, " - exhaustion vector : %u bytes\n",
2152 t->stateOffsets.exhausted_size);
2153 fprintf(f, " - logical vector : %u bytes\n",
2154 t->stateOffsets.logicalVec_size);
2155 fprintf(f, " - combination vector: %u bytes\n",
2156 t->stateOffsets.combVec_size);
2157 fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
2158 fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
2159 fprintf(f, " - active array : %u bytes\n",
2160 t->stateOffsets.activeLeafArray_size);
2161 fprintf(f, " - active rose : %u bytes\n",
2162 t->stateOffsets.activeLeftArray_size);
2163 fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize);
2164 fprintf(f, " - nfa state : %u bytes\n",
2165 t->stateOffsets.end - t->stateOffsets.nfaStateBegin);
2166 fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize);
2167 fprintf(f, " - one whole bytes : %u bytes\n",
2168 t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable);
2169 fprintf(f, " - groups : %u bytes\n",
2170 t->stateOffsets.groups_size);
2171 fprintf(f, "\n");
2172
2173 fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups);
2174 fprintf(f, "floating groups : 0x%016llx\n", t->floating_group_mask);
2175 fprintf(f, "handled key count : %u\n", t->handledKeyCount);
2176 fprintf(f, "\n");
2177
2178 fprintf(f, "total literal count : %u\n", t->totalNumLiterals);
2179 fprintf(f, " delayed literals : %u\n", t->delay_count);
2180
2181 fprintf(f, "\n");
2182 fprintf(f, " minWidth : %u\n", t->minWidth);
2183 fprintf(f, " minWidthExcludingBoundaries : %u\n",
2184 t->minWidthExcludingBoundaries);
2185 fprintf(f, " maxBiAnchoredWidth : %s\n",
2186 rose_off(t->maxBiAnchoredWidth).str().c_str());
2187 fprintf(f, " minFloatLitMatchOffset : %s\n",
2188 rose_off(t->floatingMinLiteralMatchOffset).str().c_str());
2189 fprintf(f, " maxFloatingDelayedMatch : %s\n",
2190 rose_off(t->maxFloatingDelayedMatch).str().c_str());
2191
2192 if (atable) {
2193 fprintf(f, "\nAnchored literal matcher stats:\n\n");
2194 dumpAnchoredStats(atable, f);
2195 }
2196
2197 dumpLongLiteralTable(t, f);
2198 }
2199
2200 #define DUMP_U8(o, member) \
2201 fprintf(f, " %-32s: %hhu/%hhx\n", #member, o->member, o->member)
2202 #define DUMP_U32(o, member) \
2203 fprintf(f, " %-32s: %u/%08x\n", #member, o->member, o->member)
2204 #define DUMP_U64(o, member) \
2205 fprintf(f, " %-32s: %llu/%016llx\n", #member, o->member, o->member)
2206
2207 static
roseDumpStructRaw(const RoseEngine * t,FILE * f)2208 void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
2209 fprintf(f, "struct RoseEngine {\n");
2210 DUMP_U8(t, noFloatingRoots);
2211 DUMP_U8(t, requiresEodCheck);
2212 DUMP_U8(t, hasOutfixesInSmallBlock);
2213 DUMP_U8(t, runtimeImpl);
2214 DUMP_U8(t, mpvTriggeredByLeaf);
2215 DUMP_U8(t, canExhaust);
2216 DUMP_U8(t, hasSom);
2217 DUMP_U8(t, somHorizon);
2218 DUMP_U32(t, mode);
2219 DUMP_U32(t, historyRequired);
2220 DUMP_U32(t, ekeyCount);
2221 DUMP_U32(t, lkeyCount);
2222 DUMP_U32(t, lopCount);
2223 DUMP_U32(t, ckeyCount);
2224 DUMP_U32(t, logicalTreeOffset);
2225 DUMP_U32(t, combInfoMapOffset);
2226 DUMP_U32(t, dkeyCount);
2227 DUMP_U32(t, dkeyLogSize);
2228 DUMP_U32(t, invDkeyOffset);
2229 DUMP_U32(t, somLocationCount);
2230 DUMP_U32(t, somLocationFatbitSize);
2231 DUMP_U32(t, rolesWithStateCount);
2232 DUMP_U32(t, stateSize);
2233 DUMP_U32(t, anchorStateSize);
2234 DUMP_U32(t, tStateSize);
2235 DUMP_U32(t, smallWriteOffset);
2236 DUMP_U32(t, amatcherOffset);
2237 DUMP_U32(t, ematcherOffset);
2238 DUMP_U32(t, fmatcherOffset);
2239 DUMP_U32(t, drmatcherOffset);
2240 DUMP_U32(t, sbmatcherOffset);
2241 DUMP_U32(t, longLitTableOffset);
2242 DUMP_U32(t, amatcherMinWidth);
2243 DUMP_U32(t, fmatcherMinWidth);
2244 DUMP_U32(t, eodmatcherMinWidth);
2245 DUMP_U32(t, amatcherMaxBiAnchoredWidth);
2246 DUMP_U32(t, fmatcherMaxBiAnchoredWidth);
2247 DUMP_U32(t, reportProgramOffset);
2248 DUMP_U32(t, reportProgramCount);
2249 DUMP_U32(t, delayProgramOffset);
2250 DUMP_U32(t, anchoredProgramOffset);
2251 DUMP_U32(t, activeArrayCount);
2252 DUMP_U32(t, activeLeftCount);
2253 DUMP_U32(t, queueCount);
2254 DUMP_U32(t, activeQueueArraySize);
2255 DUMP_U32(t, eagerIterOffset);
2256 DUMP_U32(t, handledKeyCount);
2257 DUMP_U32(t, handledKeyFatbitSize);
2258 DUMP_U32(t, leftOffset);
2259 DUMP_U32(t, roseCount);
2260 DUMP_U32(t, eodProgramOffset);
2261 DUMP_U32(t, flushCombProgramOffset);
2262 DUMP_U32(t, lastByteHistoryIterOffset);
2263 DUMP_U32(t, minWidth);
2264 DUMP_U32(t, minWidthExcludingBoundaries);
2265 DUMP_U32(t, maxBiAnchoredWidth);
2266 DUMP_U32(t, anchoredDistance);
2267 DUMP_U32(t, anchoredMinDistance);
2268 DUMP_U32(t, floatingDistance);
2269 DUMP_U32(t, floatingMinDistance);
2270 DUMP_U32(t, smallBlockDistance);
2271 DUMP_U32(t, floatingMinLiteralMatchOffset);
2272 DUMP_U32(t, nfaInfoOffset);
2273 DUMP_U64(t, initialGroups);
2274 DUMP_U64(t, floating_group_mask);
2275 DUMP_U32(t, size);
2276 DUMP_U32(t, delay_count);
2277 DUMP_U32(t, delay_fatbit_size);
2278 DUMP_U32(t, anchored_count);
2279 DUMP_U32(t, anchored_fatbit_size);
2280 DUMP_U32(t, maxFloatingDelayedMatch);
2281 DUMP_U32(t, delayRebuildLength);
2282 DUMP_U32(t, stateOffsets.history);
2283 DUMP_U32(t, stateOffsets.exhausted);
2284 DUMP_U32(t, stateOffsets.exhausted_size);
2285 DUMP_U32(t, stateOffsets.logicalVec);
2286 DUMP_U32(t, stateOffsets.logicalVec_size);
2287 DUMP_U32(t, stateOffsets.combVec);
2288 DUMP_U32(t, stateOffsets.combVec_size);
2289 DUMP_U32(t, stateOffsets.activeLeafArray);
2290 DUMP_U32(t, stateOffsets.activeLeafArray_size);
2291 DUMP_U32(t, stateOffsets.activeLeftArray);
2292 DUMP_U32(t, stateOffsets.activeLeftArray_size);
2293 DUMP_U32(t, stateOffsets.leftfixLagTable);
2294 DUMP_U32(t, stateOffsets.anchorState);
2295 DUMP_U32(t, stateOffsets.groups);
2296 DUMP_U32(t, stateOffsets.groups_size);
2297 DUMP_U32(t, stateOffsets.longLitState);
2298 DUMP_U32(t, stateOffsets.longLitState_size);
2299 DUMP_U32(t, stateOffsets.somLocation);
2300 DUMP_U32(t, stateOffsets.somValid);
2301 DUMP_U32(t, stateOffsets.somWritable);
2302 DUMP_U32(t, stateOffsets.somMultibit_size);
2303 DUMP_U32(t, stateOffsets.nfaStateBegin);
2304 DUMP_U32(t, stateOffsets.end);
2305 DUMP_U32(t, boundary.reportEodOffset);
2306 DUMP_U32(t, boundary.reportZeroOffset);
2307 DUMP_U32(t, boundary.reportZeroEodOffset);
2308 DUMP_U32(t, totalNumLiterals);
2309 DUMP_U32(t, asize);
2310 DUMP_U32(t, outfixBeginQueue);
2311 DUMP_U32(t, outfixEndQueue);
2312 DUMP_U32(t, leftfixBeginQueue);
2313 DUMP_U32(t, initMpvNfa);
2314 DUMP_U32(t, rosePrefixCount);
2315 DUMP_U32(t, activeLeftIterOffset);
2316 DUMP_U32(t, ematcherRegionSize);
2317 DUMP_U32(t, somRevCount);
2318 DUMP_U32(t, somRevOffsetOffset);
2319 fprintf(f, "}\n");
2320 fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
2321 }
2322
2323 static
roseDumpComponents(const RoseEngine * t,bool dump_raw,const string & base)2324 void roseDumpComponents(const RoseEngine *t, bool dump_raw,
2325 const string &base) {
2326 dumpComponentInfo(t, base);
2327 dumpComponentInfoCsv(t, base);
2328 dumpNfas(t, dump_raw, base);
2329 dumpAnchored(t, base);
2330 dumpRevComponentInfo(t, base);
2331 dumpRevNfas(t, dump_raw, base);
2332 }
2333
2334 static
roseDumpPrograms(const vector<LitFragment> & fragments,const RoseEngine * t,const string & base)2335 void roseDumpPrograms(const vector<LitFragment> &fragments, const RoseEngine *t,
2336 const string &base) {
2337 dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
2338 dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
2339 dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
2340 dumpRoseLastFlushCombPrograms(t,
2341 base + "/rose_last_flush_comb_programs.txt");
2342 dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
2343 dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
2344 dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");
2345 }
2346
2347 static
roseDumpLiteralMatchers(const RoseEngine * t,const string & base)2348 void roseDumpLiteralMatchers(const RoseEngine *t, const string &base) {
2349 if (const HWLM *hwlm = getFloatingMatcher(t)) {
2350 hwlmGenerateDumpFiles(hwlm, base + "/lit_table_floating");
2351 }
2352
2353 if (const HWLM *hwlm = getDelayRebuildMatcher(t)) {
2354 hwlmGenerateDumpFiles(hwlm, base + "/lit_table_delay_rebuild");
2355 }
2356
2357 if (const HWLM *hwlm = getEodMatcher(t)) {
2358 hwlmGenerateDumpFiles(hwlm, base + "/lit_table_eod");
2359 }
2360
2361 if (const HWLM *hwlm = getSmallBlockMatcher(t)) {
2362 hwlmGenerateDumpFiles(hwlm, base + "/lit_table_small_block");
2363 }
2364 }
2365
dumpRose(const RoseBuildImpl & build,const vector<LitFragment> & fragments,const map<left_id,u32> & leftfix_queue_map,const map<suffix_id,u32> & suffix_queue_map,const RoseEngine * t)2366 void dumpRose(const RoseBuildImpl &build, const vector<LitFragment> &fragments,
2367 const map<left_id, u32> &leftfix_queue_map,
2368 const map<suffix_id, u32> &suffix_queue_map,
2369 const RoseEngine *t) {
2370 const Grey &grey = build.cc.grey;
2371
2372 if (!grey.dumpFlags) {
2373 return;
2374 }
2375
2376 StdioFile f(grey.dumpPath + "/rose.txt", "w");
2377
2378 if (!t) {
2379 fprintf(f, "<< no rose >>\n");
2380 return;
2381 }
2382
2383 // Dump Rose table info
2384 roseDumpText(t, f);
2385
2386 roseDumpComponents(t, false, grey.dumpPath);
2387 roseDumpPrograms(fragments, t, grey.dumpPath);
2388 roseDumpLiteralMatchers(t, grey.dumpPath);
2389
2390 // Graph.
2391 dumpRoseGraph(build, t, fragments, leftfix_queue_map, suffix_queue_map,
2392 "rose.dot");
2393
2394 // Literals
2395 dumpRoseLiterals(build, fragments, grey);
2396
2397 f = StdioFile(grey.dumpPath + "/rose_struct.txt", "w");
2398 roseDumpStructRaw(t, f);
2399 }
2400
2401 } // namespace ue2
2402