1 /*
2  * Copyright (c) 2015-2020, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 
31 #include "rose_build_dump.h"
32 
33 #include "rose_build_impl.h"
34 #include "rose_build_matchers.h"
35 #include "rose_internal.h"
36 #include "rose_program.h"
37 #include "ue2common.h"
38 #include "hs_compile.h"
39 #include "hwlm/hwlm_build.h"
40 #include "hwlm/hwlm_dump.h"
41 #include "hwlm/hwlm_literal.h"
42 #include "nfa/castlecompile.h"
43 #include "nfa/nfa_build_util.h"
44 #include "nfa/nfa_dump_api.h"
45 #include "nfa/nfa_internal.h"
46 #include "nfagraph/ng_dump.h"
47 #include "som/slot_manager_dump.h"
48 #include "util/compile_context.h"
49 #include "util/container.h"
50 #include "util/dump_charclass.h"
51 #include "util/dump_util.h"
52 #include "util/graph_range.h"
53 #include "util/multibit.h"
54 #include "util/multibit_build.h"
55 #include "util/ue2string.h"
56 
57 #include <iomanip>
58 #include <numeric>
59 #include <ostream>
60 #include <set>
61 #include <sstream>
62 #include <string>
63 #include <vector>
64 
65 #ifndef DUMP_SUPPORT
66 #error No dump support!
67 #endif
68 
69 using namespace std;
70 
71 namespace ue2 {
72 
73 /** \brief Return the kind of a left_id or a suffix_id. */
74 template<class Graph>
render_kind(const Graph & g)75 string render_kind(const Graph &g) {
76     if (g.graph()) {
77         return to_string(g.graph()->kind);
78     }
79     if (g.dfa()) {
80         return to_string(g.dfa()->kind);
81     }
82     if (g.haig()) {
83         return to_string(g.haig()->kind);
84     }
85     if (g.castle()) {
86         return to_string(g.castle()->kind);
87     }
88     return "UNKNOWN";
89 }
90 
91 namespace {
92 
93 struct rose_off {
rose_offue2::__anon85d01f500111::rose_off94     explicit rose_off(u32 j) : i(j) {}
95     string str(void) const;
96     u32 i;
97 };
98 
operator <<(ostream & o,const rose_off & to)99 ostream &operator<<(ostream &o, const rose_off &to) {
100     if (to.i == ROSE_BOUND_INF) {
101         o << "inf";
102     } else {
103         o << to.i;
104     }
105     return o;
106 }
107 
str(void) const108 string rose_off::str(void) const {
109     ostringstream out;
110     out << *this;
111     return out.str();
112 }
113 
114 class RoseGraphWriter {
115 public:
RoseGraphWriter(const RoseBuildImpl & b_in,const map<u32,u32> & frag_map_in,const map<left_id,u32> & lqm_in,const map<suffix_id,u32> & sqm_in)116     RoseGraphWriter(const RoseBuildImpl &b_in, const map<u32, u32> &frag_map_in,
117                     const map<left_id, u32> &lqm_in,
118                     const map<suffix_id, u32> &sqm_in)
119         : frag_map(frag_map_in), leftfix_queue_map(lqm_in),
120           suffix_queue_map(sqm_in), build(b_in) {
121         for (const auto &m : build.ghost) {
122             ghost.insert(m.second);
123         }
124     }
125 
operator ()(ostream & os,const RoseVertex & v) const126     void operator() (ostream &os, const RoseVertex &v) const {
127         const RoseGraph &g = build.g;
128 
129         if (v == build.root) {
130             os << "[label=\"<root>\"]";
131             return;
132         }
133 
134         if (v == build.anchored_root) {
135             os << "[label=\"<^>\"]";
136             return;
137         }
138 
139         os << "[label=\"";
140         os << "index=" << g[v].index <<"\\n";
141 
142         for (u32 lit_id : g[v].literals) {
143             writeLiteral(os, lit_id);
144             os << "\\n";
145         }
146 
147         os << "min_offset=" << g[v].min_offset;
148         if (g[v].max_offset >= ROSE_BOUND_INF) {
149             os << ", max_offset=inf";
150         } else {
151             os << ", max_offset=" << g[v].max_offset;
152         }
153         os << "\\n";
154 
155         if (!g[v].reports.empty()) {
156             if (g[v].eod_accept) {
157                 os << "\\nACCEPT_EOD";
158             } else {
159                 os << "\\nACCEPT";
160             }
161             os << " (rep=" << as_string_list(g[v].reports) << ")";
162         }
163 
164         if (g[v].suffix) {
165             suffix_id suff(g[v].suffix);
166             os << "\\n" << render_kind(suff) << " (top " << g[v].suffix.top;
167             auto it = suffix_queue_map.find(suff);
168             if (it != end(suffix_queue_map)) {
169                 os << ", queue " << it->second;
170             }
171             os << ")";
172         }
173 
174         if (ghost.find(v) != ghost.end()) {
175             os << "\\nGHOST";
176         }
177 
178         if (g[v].left) {
179             left_id left(g[v].left);
180             os << "\\n" << render_kind(left) << " (queue ";
181             auto it = leftfix_queue_map.find(left);
182             if (it != end(leftfix_queue_map)) {
183                 os << it->second;
184             } else {
185                 os << "??";
186             }
187             os << ", report " << g[v].left.leftfix_report << ")";
188         }
189 
190         os << "\"";
191 
192         // Roles with a rose prefix get a colour.
193         if (g[v].left) {
194             os << " color=violetred ";
195         }
196 
197         // Our accepts get different colours.
198         if (!g[v].reports.empty()) {
199             os << " color=blue ";
200         }
201         if (g[v].suffix) {
202             os << " color=forestgreen ";
203         }
204 
205         os << "]";
206     }
207 
operator ()(ostream & os,const RoseEdge & e) const208     void operator() (ostream &os, const RoseEdge &e) const {
209         const RoseGraph &g = build.g;
210 
211         // Render the bounds on this edge.
212         u32 minBound = g[e].minBound;
213         u32 maxBound = g[e].maxBound;
214 
215         os << "[label=\"";
216         if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
217             os << ".*";
218         } else if (minBound == 1 && maxBound == ROSE_BOUND_INF) {
219             os << ".+";
220         } else {
221             os << ".{" << minBound << ",";
222             if (maxBound != ROSE_BOUND_INF) {
223                 os << maxBound;
224             }
225             os << "}";
226         }
227 
228         // If we lead to an infix, display which top we're using.
229         RoseVertex v = target(e, g);
230         if (g[v].left) {
231             os << "\\nROSE TOP " << g[e].rose_top;
232         }
233 
234         switch (g[e].history) {
235         case ROSE_ROLE_HISTORY_NONE:
236             break;
237         case ROSE_ROLE_HISTORY_ANCH:
238             os << "\\nANCH history";
239             break;
240         case ROSE_ROLE_HISTORY_LAST_BYTE:
241             os << "\\nLAST_BYTE history";
242             break;
243         case ROSE_ROLE_HISTORY_INVALID:
244             os << "\\nINVALID history";
245             break;
246         }
247 
248         os << "\"]";
249     }
250 
251 private:
252     // Render the literal associated with a vertex.
writeLiteral(ostream & os,u32 id) const253     void writeLiteral(ostream &os, u32 id) const {
254         os << "lit=" << id;
255         if (contains(frag_map, id)) {
256             os << "/" << frag_map.at(id) << " ";
257         } else {
258             os << "/nofrag ";
259         }
260 
261         const auto &lit = build.literals.at(id);
262         os << '\'' << dotEscapeString(lit.s.get_string()) << '\'';
263         if (lit.s.any_nocase()) {
264             os << " (nocase)";
265         }
266         if (lit.delay) {
267             os << " +" << lit.delay;
268         }
269     }
270 
271     set<RoseVertex> ghost;
272     const map<u32, u32> &frag_map;
273     const map<left_id, u32> &leftfix_queue_map;
274     const map<suffix_id, u32> &suffix_queue_map;
275     const RoseBuildImpl &build;
276 };
277 
278 } // namespace
279 
280 static
makeFragMap(const vector<LitFragment> & fragments)281 map<u32, u32> makeFragMap(const vector<LitFragment> &fragments) {
282     map<u32, u32> fm;
283     for (const auto &f : fragments) {
284         for (u32 id : f.lit_ids) {
285             fm[id] = f.fragment_id;
286         }
287     }
288 
289     return fm;
290 }
291 
292 static
dumpRoseGraph(const RoseBuildImpl & build,const RoseEngine * t,const vector<LitFragment> & fragments,const map<left_id,u32> & leftfix_queue_map,const map<suffix_id,u32> & suffix_queue_map,const char * filename)293 void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t,
294                    const vector<LitFragment> &fragments,
295                    const map<left_id, u32> &leftfix_queue_map,
296                    const map<suffix_id, u32> &suffix_queue_map,
297                    const char *filename) {
298     const Grey &grey = build.cc.grey;
299 
300     /* "early" rose graphs should only be dumped if we are dumping intermediate
301      * graphs. Early graphs can be identified by the lack of a RoseEngine. */
302     u32 flag_test = t ? Grey::DUMP_IMPL : Grey::DUMP_INT_GRAPH;
303 
304     if (!(grey.dumpFlags & flag_test)) {
305         return;
306     }
307 
308     stringstream ss;
309     ss << grey.dumpPath << filename;
310 
311     DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str());
312     ofstream os(ss.str());
313 
314     auto frag_map = makeFragMap(fragments);
315     RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map);
316     writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g));
317 }
318 
dumpRoseGraph(const RoseBuildImpl & build,const char * filename)319 void dumpRoseGraph(const RoseBuildImpl &build, const char *filename) {
320     dumpRoseGraph(build, nullptr, {}, {}, {}, filename);
321 }
322 
323 namespace {
324 struct CompareVertexRole {
CompareVertexRoleue2::__anon85d01f500211::CompareVertexRole325     explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {}
operator ()ue2::__anon85d01f500211::CompareVertexRole326     inline bool operator()(const RoseVertex &a, const RoseVertex &b) const {
327         return g[a].index < g[b].index;
328     }
329 private:
330     const RoseGraph &g;
331 };
332 }
333 
334 static
lit_graph_info(const RoseBuildImpl & build,const rose_literal_info & li,u32 * min_offset,bool * in_root_role)335 void lit_graph_info(const RoseBuildImpl &build, const rose_literal_info &li,
336                     u32 *min_offset, bool *in_root_role) {
337     *min_offset = ~0U;
338     *in_root_role = false;
339     for (auto v : li.vertices) {
340         *in_root_role |= build.isRootSuccessor(v);
341 
342         LIMIT_TO_AT_MOST(min_offset, build.g[v].min_offset);
343     }
344 }
345 
346 static
dumpRoseLiterals(const RoseBuildImpl & build,const vector<LitFragment> & fragments,const Grey & grey)347 void dumpRoseLiterals(const RoseBuildImpl &build,
348                       const vector<LitFragment> &fragments,
349                       const Grey &grey) {
350     const RoseGraph &g = build.g;
351     map<u32, u32> frag_map = makeFragMap(fragments);
352 
353     DEBUG_PRINTF("dumping literals\n");
354     ofstream os(grey.dumpPath + "rose_literals.txt");
355 
356     os << "ROSE LITERALS: a total of " << build.literals.size()
357        << " literals and " << num_vertices(g) << " roles." << endl
358        << endl;
359 
360     for (u32 id = 0; id < build.literals.size(); id++) {
361         const auto &lit = build.literals.at(id);
362         const ue2_literal &s = lit.s;
363         const rose_literal_info &lit_info = build.literal_info[id];
364 
365         switch (lit.table) {
366         case ROSE_ANCHORED:
367             os << "ANCHORED";
368             break;
369         case ROSE_FLOATING:
370             os << "FLOATING";
371             break;
372         case ROSE_EOD_ANCHORED:
373             os << "EOD-ANCHORED";
374             break;
375         case ROSE_ANCHORED_SMALL_BLOCK:
376             os << "SMALL-BLOCK";
377             break;
378         case ROSE_EVENT:
379             os << "EVENT";
380             break;
381         }
382 
383         os << " ID " << id;
384         if (contains(frag_map, id)) {
385             os << "/" << frag_map.at(id);
386         }
387         os << ": \"" << escapeString(s.get_string()) << "\""
388            << " (len " << s.length() << ",";
389         if (s.any_nocase()) {
390             os << " nocase,";
391         }
392         if (lit_info.requires_benefits) {
393             os << " benefits,";
394         }
395 
396         if (lit.delay) {
397             os << " delayed "<< lit.delay << ",";
398         }
399 
400         os << " groups 0x" << hex << setw(16) << setfill('0')
401            << lit_info.group_mask << dec << ",";
402 
403         if (lit_info.squash_group) {
404             os << " squashes group,";
405         }
406 
407         u32 min_offset;
408         bool in_root_role;
409         lit_graph_info(build, lit_info, &min_offset, &in_root_role);
410         os << " min offset " << min_offset;
411         if (in_root_role) {
412             os << " root literal";
413         }
414 
415         os << ") roles=" << lit_info.vertices.size() << endl;
416 
417         if (!lit_info.delayed_ids.empty()) {
418             os << "  Children:";
419             for (u32 d_id : lit_info.delayed_ids) {
420                 os << " " << d_id;
421             }
422             os << endl;
423         }
424 
425         // Temporary vector, so that we can sort the output by role.
426         vector<RoseVertex> verts(lit_info.vertices.begin(),
427                                  lit_info.vertices.end());
428         sort(verts.begin(), verts.end(), CompareVertexRole(g));
429 
430         for (RoseVertex v : verts) {
431             // role info
432             os << "  Index " << g[v].index << ": groups=0x" << hex << setw(16)
433                << setfill('0') << g[v].groups << dec;
434 
435             if (g[v].reports.empty()) {
436                 os << ", report=NONE";
437             } else {
438                 os << ", report={" << as_string_list(g[v].reports) << "}";
439             }
440 
441             os << ", min_offset=" << g[v].min_offset;
442             os << ", max_offset=" << g[v].max_offset << endl;
443             // pred info
444             for (const auto &ie : in_edges_range(v, g)) {
445                 const auto &u = source(ie, g);
446                 os << "    Predecessor index=";
447                 if (u == build.root) {
448                     os << "ROOT";
449                 } else if (u == build.anchored_root) {
450                     os << "ANCHORED_ROOT";
451                 } else {
452                     os << g[u].index;
453                 }
454                 os << ": bounds [" << g[ie].minBound << ", ";
455                 if (g[ie].maxBound == ROSE_BOUND_INF) {
456                     os << "inf";
457                 } else {
458                     os << g[ie].maxBound;
459                 }
460                 os << "]" << endl;
461             }
462         }
463     }
464 
465     os.close();
466 }
467 
468 template<class Iter>
469 static
toHex(Iter i,const Iter & end)470 string toHex(Iter i, const Iter &end) {
471     ostringstream oss;
472     for (; i != end; ++i) {
473         u8 c = *i;
474         oss << hex << setw(2) << setfill('0') << ((unsigned)c & 0xff);
475     }
476     return oss.str();
477 }
478 
479 static
isMetaChar(char c)480 bool isMetaChar(char c) {
481     switch (c) {
482     case '#':
483     case '$':
484     case '(':
485     case ')':
486     case '*':
487     case '+':
488     case '.':
489     case '/':
490     case '?':
491     case '[':
492     case '\\':
493     case ']':
494     case '^':
495     case '{':
496     case '|':
497     case '}':
498         return true;
499     default:
500         return false;
501     }
502 }
503 
504 static
toRegex(const string & lit)505 string toRegex(const string &lit) {
506     ostringstream os;
507     for (char c : lit) {
508         if (0x20 <= c && c <= 0x7e) {
509             if (isMetaChar(c)) {
510                 os << "\\" << c;
511             } else {
512                 os << c;
513             }
514         } else if (c == '\n') {
515             os << "\\n";
516         } else if (c == '\r') {
517             os << "\\r";
518         } else if (c == '\t') {
519             os << "\\t";
520         } else {
521             os << "\\x" << hex << setw(2) << setfill('0')
522                << (unsigned)(c & 0xff) << dec;
523         }
524     }
525     return os.str();
526 }
527 
dumpMatcherLiterals(const vector<hwlmLiteral> & lits,const string & name,const Grey & grey)528 void dumpMatcherLiterals(const vector<hwlmLiteral> &lits, const string &name,
529                          const Grey &grey) {
530     if (!grey.dumpFlags) {
531         return;
532     }
533 
534     ofstream of(grey.dumpPath + "rose_" + name + "_test_literals.txt");
535 
536     // Unique regex index, as literals may share an ID.
537     u32 i = 0;
538 
539     for (const hwlmLiteral &lit : lits) {
540         // First, detail in a comment.
541         of << "# id=" << lit.id;
542         if (!lit.msk.empty()) {
543             of << " msk=0x" << toHex(lit.msk.begin(), lit.msk.end());
544             of << " cmp=0x" << toHex(lit.cmp.begin(), lit.cmp.end());
545         }
546         of << " groups=0x" << hex << setfill('0') << lit.groups << dec;
547         if (lit.noruns) {
548             of << " noruns";
549         }
550         of << endl;
551 
552         // Second, literal rendered as a regex.
553         of << i << ":/" << toRegex(lit.s) << (lit.nocase ? "/i" : "/");
554 
555         of << endl;
556 
557         i++;
558     }
559 
560     of.close();
561 }
562 
563 static
loadFromByteCodeOffset(const RoseEngine * t,u32 offset)564 const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) {
565     if (!offset) {
566         return nullptr;
567     }
568 
569     const char *lt = (const char *)t + offset;
570     return lt;
571 }
572 
573 static
getAnchoredMatcher(const RoseEngine * t)574 const void *getAnchoredMatcher(const RoseEngine *t) {
575     return loadFromByteCodeOffset(t, t->amatcherOffset);
576 }
577 
578 static
getFloatingMatcher(const RoseEngine * t)579 const HWLM *getFloatingMatcher(const RoseEngine *t) {
580     return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset);
581 }
582 
583 static
getDelayRebuildMatcher(const RoseEngine * t)584 const HWLM *getDelayRebuildMatcher(const RoseEngine *t) {
585     return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset);
586 }
587 
588 static
getEodMatcher(const RoseEngine * t)589 const HWLM *getEodMatcher(const RoseEngine *t) {
590     return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset);
591 }
592 
593 static
getSmallBlockMatcher(const RoseEngine * t)594 const HWLM *getSmallBlockMatcher(const RoseEngine *t) {
595     return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset);
596 }
597 
598 static
bitvectorToReach(const u8 * reach)599 CharReach bitvectorToReach(const u8 *reach) {
600     CharReach cr;
601 
602     for (size_t i = 0; i < N_CHARS; i++) {
603         if (reach[i / 8] & (1U << (i % 8))) {
604             cr.set(i);
605         }
606     }
607     return cr;
608 }
609 
610 static
multiBitvectorToReach(const u8 * reach,u8 path_mask)611 CharReach multiBitvectorToReach(const u8 *reach, u8 path_mask) {
612     CharReach cr;
613     for (size_t i = 0; i < N_CHARS; i++) {
614         if (reach[i] & path_mask) {
615             cr.set(i);
616         }
617     }
618     return cr;
619 }
620 
621 static
dumpLookaround(ofstream & os,const RoseEngine * t,const ROSE_STRUCT_CHECK_LOOKAROUND * ri)622 void dumpLookaround(ofstream &os, const RoseEngine *t,
623                     const ROSE_STRUCT_CHECK_LOOKAROUND *ri) {
624     assert(ri);
625 
626     const u8 *base = (const u8 *)t;
627 
628     const s8 *look = (const s8 *)base + ri->look_index;
629     const s8 *look_end = look + ri->count;
630     const u8 *reach = base + ri->reach_index;
631 
632     os << "    contents:" << endl;
633 
634     for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
635         os << "      " << std::setw(4) << std::setfill(' ') << int{*look}
636            << ": ";
637         describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
638         os << endl;
639     }
640 }
641 
642 static
dumpMultipathLookaround(ofstream & os,const RoseEngine * t,const ROSE_STRUCT_MULTIPATH_LOOKAROUND * ri)643 void dumpMultipathLookaround(ofstream &os, const RoseEngine *t,
644                              const ROSE_STRUCT_MULTIPATH_LOOKAROUND *ri) {
645     assert(ri);
646 
647     const u8 *base = (const u8 *)t;
648 
649     const s8 *look_begin = (const s8 *)base + ri->look_index;
650     const s8 *look_end = look_begin + ri->count;
651     const u8 *reach_begin = base + ri->reach_index;
652 
653     os << "    contents:" << endl;
654 
655     u32 path_mask = ri->start_mask[0];
656     while (path_mask) {
657         u32 path = findAndClearLSB_32(&path_mask);
658         os << "    Path #" << path << ":" << endl;
659         os << "      ";
660 
661         const s8 *look = look_begin;
662         const u8 *reach = reach_begin;
663         for (; look < look_end; look++, reach += MULTI_REACH_BITVECTOR_LEN) {
664             CharReach cr = multiBitvectorToReach(reach, 1U << path);
665             if (cr.any() && !cr.all()) {
666                 os << "<" << int(*look) << ": ";
667                 describeClass(os, cr, 1000, CC_OUT_TEXT);
668                 os << "> ";
669             }
670         }
671         os << endl;
672     }
673 }
674 
675 static
sparseIterValues(const mmbit_sparse_iter * it,u32 num_bits)676 vector<u32> sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) {
677     vector<u32> keys;
678 
679     if (num_bits == 0) {
680         return keys;
681     }
682 
683     // Populate a multibit structure with all-ones. Note that the multibit
684     // runtime assumes that it is always safe to read 8 bytes, so we must
685     // over-allocate for smaller sizes.
686     const size_t num_bytes = mmbit_size(num_bits);
687     vector<u8> bits(max(size_t{8}, num_bytes), u8{0xff}); // All bits on.
688     const u8 *b = bits.data();
689     if (num_bytes < 8) {
690         b += 8 - num_bytes;
691     }
692 
693     vector<mmbit_sparse_state> state(MAX_SPARSE_ITER_STATES);
694     mmbit_sparse_state *s = state.data();
695 
696     u32 idx = 0;
697     u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s);
698     while (i != MMB_INVALID) {
699         keys.push_back(i);
700         i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s);
701     }
702 
703     return keys;
704 }
705 
706 static
dumpJumpTable(ofstream & os,const RoseEngine * t,const ROSE_STRUCT_SPARSE_ITER_BEGIN * ri)707 void dumpJumpTable(ofstream &os, const RoseEngine *t,
708                    const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) {
709     auto *it =
710         (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset);
711     auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table);
712 
713     for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) {
714         os << "      " << std::setw(4) << std::setfill(' ') << key << " : +"
715            << *jumps << endl;
716         ++jumps;
717     }
718 }
719 
720 static
dumpSomOperation(ofstream & os,const som_operation & op)721 void dumpSomOperation(ofstream &os, const som_operation &op) {
722     os << "    som (type=" << u32{op.type} << ", onmatch=" << op.onmatch;
723     switch (op.type) {
724     case SOM_EXTERNAL_CALLBACK_REV_NFA:
725     case SOM_INTERNAL_LOC_SET_REV_NFA:
726     case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET:
727     case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE:
728         os << ", revNfaIndex=" << op.aux.revNfaIndex;
729         break;
730     default:
731         os << ", somDistance=" << op.aux.somDistance;
732         break;
733     }
734     os << ")" << endl;
735 }
736 
737 static
dumpStrMask(const u8 * mask,size_t len)738 string dumpStrMask(const u8 *mask, size_t len) {
739     ostringstream oss;
740     for (size_t i = 0; i < len; i++) {
741         oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]}
742             << " ";
743     }
744     return oss.str();
745 }
746 
747 static
shufti2cr(const u8 * lo,const u8 * hi,u8 bucket_mask)748 CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) {
749     CharReach cr;
750     for (u32 i = 0; i < N_CHARS; i++) {
751         if(lo[i & 0xf] & hi[i >> 4] & bucket_mask) {
752             cr.set(i);
753         }
754     }
755     return cr;
756 }
757 
758 static
dumpLookaroundShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * bucket_mask,u64a neg_mask,s32 offset)759 void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
760                           const u8 *bucket_mask, u64a neg_mask, s32 offset) {
761     assert(len == 16 || len == 32 || len == 64);
762     os << "    contents:" << endl;
763     for (u32 idx = 0; idx < len; idx++) {
764         CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
765         if (neg_mask & (1ULL << idx)) {
766             cr.flip();
767         }
768 
769         if (cr.any() && !cr.all()) {
770             os << "      " << std::setw(4) << std::setfill(' ')
771                << int(offset + idx) << ": ";
772             describeClass(os, cr, 1000, CC_OUT_TEXT);
773             os << endl;
774         }
775     }
776 }
777 
778 static
dumpLookaroundShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * lo_2,const u8 * hi_2,const u8 * bucket_mask,const u8 * bucket_mask_2,u64a neg_mask,s32 offset)779 void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
780                           const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
781                           const u8 *bucket_mask_2, u64a neg_mask, s32 offset) {
782     assert(len == 16 || len == 32 || len == 64);
783     os << "    contents:" << endl;
784     for (u32 idx = 0; idx < len; idx++) {
785         CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
786         cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
787         if (neg_mask & (1ULL << idx)) {
788             cr.flip();
789         }
790 
791         if (cr.any() && !cr.all()) {
792             os << "      " << std::setw(4) << std::setfill(' ')
793                << int(offset + idx) << ": ";
794             describeClass(os, cr, 1000, CC_OUT_TEXT);
795             os << endl;
796         }
797     }
798 }
799 
800 static
dumpMultipathShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * bucket_mask,const u8 * data_offset,u64a neg_mask,s32 base_offset)801 void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
802                          const u8 *bucket_mask, const u8 *data_offset,
803                          u64a neg_mask, s32 base_offset) {
804     assert(len == 16 || len == 32 || len == 64);
805     os << "    contents:" << endl;
806     u32 path = 0;
807     for (u32 idx = 0; idx < len; idx++) {
808         CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
809 
810         if (neg_mask & (1ULL << idx)) {
811             cr.flip();
812         }
813 
814         if (cr.any() && !cr.all()) {
815             if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) {
816                 path++;
817                 if (idx) {
818                     os << endl;
819                 }
820                 os << "    Path #" << path << ":" << endl;
821                 os << "      ";
822             }
823 
824             os << "<" << int(base_offset + data_offset[idx]) << ": ";
825             describeClass(os, cr, 1000, CC_OUT_TEXT);
826             os << "> ";
827         }
828     }
829     os << endl;
830 }
831 
832 static
dumpMultipathShufti(ofstream & os,u32 len,const u8 * lo,const u8 * hi,const u8 * lo_2,const u8 * hi_2,const u8 * bucket_mask,const u8 * bucket_mask_2,const u8 * data_offset,u32 neg_mask,s32 base_offset)833 void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
834                          const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
835                          const u8 *bucket_mask_2, const u8 *data_offset,
836                          u32 neg_mask, s32 base_offset) {
837     assert(len == 16 || len == 32 || len == 64);
838     os << "    contents:";
839     u32 path = 0;
840     for (u32 idx = 0; idx < len; idx++) {
841         CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
842         cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
843 
844         if (neg_mask & (1ULL << idx)) {
845             cr.flip();
846         }
847 
848         if (cr.any() && !cr.all()) {
849             if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) {
850                 path++;
851                 os << endl;
852                 os << "    Path #" << path << ":" << endl;
853                 os << "      ";
854             }
855 
856             os << "<" << int(base_offset + data_offset[idx]) << ": ";
857             describeClass(os, cr, 1000, CC_OUT_TEXT);
858             os << "> ";
859         }
860     }
861     os << endl;
862 }
863 
864            #define PROGRAM_CASE(name)                                                     \
865     case ROSE_INSTR_##name: {                                                  \
866         os << "  " << std::setw(4) << std::setfill('0') << (pc - pc_base)      \
867            << ": " #name "\n";                                                 \
868         const auto *ri = (const struct ROSE_STRUCT_##name *)pc;
869 
870 #define PROGRAM_NEXT_INSTRUCTION                                               \
871     pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN);                        \
872     break;                                                                     \
873     }
874 
875 
876 static
dumpProgram(ofstream & os,const RoseEngine * t,const char * pc)877 void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
878     const char *pc_base = pc;
879     for (;;) {
880         u8 code = *(const u8 *)pc;
881         assert(code <= LAST_ROSE_INSTRUCTION);
882         const size_t offset = pc - pc_base;
883         switch (code) {
884             PROGRAM_CASE(END) { return; }
885             PROGRAM_NEXT_INSTRUCTION
886 
887             PROGRAM_CASE(ANCHORED_DELAY) {
888                 os << "    groups 0x" << std::hex << ri->groups << std::dec
889                    << endl;
890                 os << "    anch_id " << ri->anch_id << "\n";
891                 os << "    done_jump " << offset + ri->done_jump << endl;
892             }
893             PROGRAM_NEXT_INSTRUCTION
894 
895             PROGRAM_CASE(CHECK_LIT_EARLY) {
896                 os << "    min_offset " << ri->min_offset << endl;
897                 os << "    fail_jump " << offset + ri->fail_jump << endl;
898             }
899             PROGRAM_NEXT_INSTRUCTION
900 
901             PROGRAM_CASE(CHECK_GROUPS) {
902                 os << "    groups 0x" << std::hex << ri->groups << std::dec
903                    << endl;
904             }
905             PROGRAM_NEXT_INSTRUCTION
906 
907             PROGRAM_CASE(CHECK_ONLY_EOD) {
908                 os << "    fail_jump " << offset + ri->fail_jump << endl;
909             }
910             PROGRAM_NEXT_INSTRUCTION
911 
912             PROGRAM_CASE(CHECK_BOUNDS) {
913                 os << "    min_bound " << ri->min_bound << endl;
914                 os << "    max_bound " << ri->max_bound << endl;
915                 os << "    fail_jump " << offset + ri->fail_jump << endl;
916             }
917             PROGRAM_NEXT_INSTRUCTION
918 
919             PROGRAM_CASE(CHECK_NOT_HANDLED) {
920                 os << "    key " << ri->key << endl;
921                 os << "    fail_jump " << offset + ri->fail_jump << endl;
922             }
923             PROGRAM_NEXT_INSTRUCTION
924 
925             PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) {
926                 os << "    offset " << int{ri->offset} << endl;
927                 os << "    reach_index " << ri->reach_index << endl;
928                 os << "    fail_jump " << offset + ri->fail_jump << endl;
929                 const u8 *reach = (const u8 *)t + ri->reach_index;
930                 os << "    contents ";
931                 describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
932                 os << endl;
933             }
934             PROGRAM_NEXT_INSTRUCTION
935 
936             PROGRAM_CASE(CHECK_LOOKAROUND) {
937                 os << "    look_index " << ri->look_index << endl;
938                 os << "    reach_index " << ri->reach_index << endl;
939                 os << "    count " << ri->count << endl;
940                 os << "    fail_jump " << offset + ri->fail_jump << endl;
941                 dumpLookaround(os, t, ri);
942             }
943             PROGRAM_NEXT_INSTRUCTION
944 
945             PROGRAM_CASE(CHECK_MASK) {
946                 os << "    and_mask 0x" << std::hex << std::setw(16)
947                    << std::setfill('0') << ri->and_mask << std::dec << endl;
948                 os << "    cmp_mask 0x" << std::hex << std::setw(16)
949                    << std::setfill('0') << ri->cmp_mask << std::dec << endl;
950                 os << "    neg_mask 0x" << std::hex << std::setw(16)
951                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
952                 os << "    offset " << ri->offset << endl;
953                 os << "    fail_jump " << offset + ri->fail_jump << endl;
954             }
955             PROGRAM_NEXT_INSTRUCTION
956 
957             PROGRAM_CASE(CHECK_MASK_32) {
958                 os << "    and_mask "
959                    << dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
960                    << endl;
961                 os << "    cmp_mask "
962                    << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
963                    << endl;
964                 os << "    neg_mask 0x" << std::hex << std::setw(8)
965                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
966                 os << "    offset " << ri->offset << endl;
967                 os << "    fail_jump " << offset + ri->fail_jump << endl;
968             }
969             PROGRAM_NEXT_INSTRUCTION
970 
971             PROGRAM_CASE(CHECK_MASK_64) {
972                 os << "    and_mask "
973                    << dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
974                    << endl;
975                 os << "    cmp_mask "
976                    << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
977                    << endl;
978                 os << "    neg_mask 0x" << std::hex << std::setw(8)
979                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
980                 os << "    offset " << ri->offset << endl;
981                 os << "    fail_jump " << offset + ri->fail_jump << endl;
982             }
983             PROGRAM_NEXT_INSTRUCTION
984 
985             PROGRAM_CASE(CHECK_BYTE) {
986                 os << "    and_mask 0x" << std::hex << std::setw(2)
987                    << std::setfill('0') << u32{ri->and_mask} << std::dec
988                    << endl;
989                 os << "    cmp_mask 0x" << std::hex << std::setw(2)
990                    << std::setfill('0') << u32{ri->cmp_mask} << std::dec
991                    << endl;
992                 os << "    negation " << u32{ri->negation} << endl;
993                 os << "    offset " << ri->offset << endl;
994                 os << "    fail_jump " << offset + ri->fail_jump << endl;
995             }
996             PROGRAM_NEXT_INSTRUCTION
997 
998             PROGRAM_CASE(CHECK_SHUFTI_16x8) {
999                 os << "    nib_mask "
1000                    << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask))
1001                    << endl;
1002                 os << "    bucket_select_mask "
1003                    << dumpStrMask(ri->bucket_select_mask,
1004                                   sizeof(ri->bucket_select_mask))
1005                    << endl;
1006                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1007                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1008                 os << "    offset " << ri->offset << endl;
1009                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1010                 dumpLookaroundShufti(os, 16, ri->nib_mask, ri->nib_mask + 16,
1011                                      ri->bucket_select_mask, ri->neg_mask,
1012                                      ri->offset);
1013             }
1014             PROGRAM_NEXT_INSTRUCTION
1015 
1016             PROGRAM_CASE(CHECK_SHUFTI_32x8) {
1017                 os << "    hi_mask "
1018                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1019                    << endl;
1020                 os << "    lo_mask "
1021                    << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1022                    << endl;
1023                 os << "    bucket_select_mask "
1024                    << dumpStrMask(ri->bucket_select_mask,
1025                                   sizeof(ri->bucket_select_mask))
1026                    << endl;
1027                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1028                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1029                 os << "    offset " << ri->offset << endl;
1030                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1031                 dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask,
1032                                      ri->bucket_select_mask, ri->neg_mask,
1033                                      ri->offset);
1034             }
1035             PROGRAM_NEXT_INSTRUCTION
1036 
1037             PROGRAM_CASE(CHECK_SHUFTI_16x16) {
1038                 os << "    hi_mask "
1039                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1040                    << endl;
1041                 os << "    lo_mask "
1042                    << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1043                    << endl;
1044                 os << "    bucket_select_mask "
1045                    << dumpStrMask(ri->bucket_select_mask,
1046                                   sizeof(ri->bucket_select_mask))
1047                    << endl;
1048                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1049                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1050                 os << "    offset " << ri->offset << endl;
1051                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1052                 dumpLookaroundShufti(os, 16, ri->lo_mask, ri->hi_mask,
1053                                      ri->lo_mask + 16, ri->hi_mask + 16,
1054                                      ri->bucket_select_mask,
1055                                      ri->bucket_select_mask + 16,
1056                                      ri->neg_mask, ri->offset);
1057             }
1058             PROGRAM_NEXT_INSTRUCTION
1059 
1060             PROGRAM_CASE(CHECK_SHUFTI_32x16) {
1061                 os << "    hi_mask "
1062                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1063                    << endl;
1064                 os << "    lo_mask "
1065                    << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1066                    << endl;
1067                 os << "    bucket_select_mask_hi "
1068                    << dumpStrMask(ri->bucket_select_mask_hi,
1069                                   sizeof(ri->bucket_select_mask_hi))
1070                    << endl;
1071                 os << "    bucket_select_mask_lo "
1072                    << dumpStrMask(ri->bucket_select_mask_lo,
1073                                   sizeof(ri->bucket_select_mask_lo))
1074                    << endl;
1075                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1076                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1077                 os << "    offset " << ri->offset << endl;
1078                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1079                 dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask,
1080                                      ri->lo_mask + 16, ri->hi_mask + 16,
1081                                      ri->bucket_select_mask_lo,
1082                                      ri->bucket_select_mask_hi,
1083                                      ri->neg_mask, ri->offset);
1084             }
1085             PROGRAM_NEXT_INSTRUCTION
1086 
1087             PROGRAM_CASE(CHECK_SHUFTI_64x8) {
1088                 os << "    hi_mask "
1089                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1090                    << endl;
1091                 os << "    lo_mask "
1092                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1093                    << endl;
1094                 os << "    bucket_select_mask "
1095                    << dumpStrMask(ri->bucket_select_mask,
1096                                   sizeof(ri->bucket_select_mask))
1097                    << endl;
1098                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1099                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1100                 os << "    offset " << ri->offset << endl;
1101                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1102                 dumpLookaroundShufti(os, 64, ri->lo_mask, ri->hi_mask,
1103                                      ri->bucket_select_mask, ri->neg_mask,
1104                                      ri->offset);
1105             }
1106             PROGRAM_NEXT_INSTRUCTION
1107 
1108             PROGRAM_CASE(CHECK_SHUFTI_64x16) {
1109                 os << "    hi_mask_1 "
1110                    << dumpStrMask(ri->hi_mask_1, sizeof(ri->hi_mask_1))
1111                    << endl;
1112                 os << "    hi_mask_2 "
1113                    << dumpStrMask(ri->hi_mask_2, sizeof(ri->hi_mask_2))
1114                    << endl;
1115                 os << "    lo_mask_1 "
1116                    << dumpStrMask(ri->lo_mask_1, sizeof(ri->lo_mask_1))
1117                    << endl;
1118                 os << "    lo_mask_2 "
1119                    << dumpStrMask(ri->lo_mask_2, sizeof(ri->lo_mask_2))
1120                    << endl;
1121                 os << "    bucket_select_mask_hi "
1122                    << dumpStrMask(ri->bucket_select_mask_hi,
1123                                   sizeof(ri->bucket_select_mask_hi))
1124                    << endl;
1125                 os << "    bucket_select_mask_lo "
1126                    << dumpStrMask(ri->bucket_select_mask_lo,
1127                                   sizeof(ri->bucket_select_mask_lo))
1128                    << endl;
1129                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1130                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1131                 os << "    offset " << ri->offset << endl;
1132                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1133                 dumpLookaroundShufti(os, 64, ri->lo_mask_1, ri->hi_mask_1,
1134                                      ri->lo_mask_2, ri->hi_mask_2,
1135                                      ri->bucket_select_mask_lo,
1136                                      ri->bucket_select_mask_hi,
1137                                      ri->neg_mask, ri->offset);
1138             }
1139             PROGRAM_NEXT_INSTRUCTION
1140 
1141             PROGRAM_CASE(CHECK_INFIX) {
1142                 os << "    queue " << ri->queue << endl;
1143                 os << "    lag " << ri->lag << endl;
1144                 os << "    report " << ri->report << endl;
1145                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1146             }
1147             PROGRAM_NEXT_INSTRUCTION
1148 
1149             PROGRAM_CASE(CHECK_PREFIX) {
1150                 os << "    queue " << ri->queue << endl;
1151                 os << "    lag " << ri->lag << endl;
1152                 os << "    report " << ri->report << endl;
1153                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1154             }
1155             PROGRAM_NEXT_INSTRUCTION
1156 
1157             PROGRAM_CASE(PUSH_DELAYED) {
1158                 os << "    delay " << u32{ri->delay} << endl;
1159                 os << "    index " << ri->index << endl;
1160             }
1161             PROGRAM_NEXT_INSTRUCTION
1162 
1163             PROGRAM_CASE(DUMMY_NOP) {}
1164             PROGRAM_NEXT_INSTRUCTION
1165 
1166             PROGRAM_CASE(CATCH_UP) {}
1167             PROGRAM_NEXT_INSTRUCTION
1168 
1169             PROGRAM_CASE(CATCH_UP_MPV) {}
1170             PROGRAM_NEXT_INSTRUCTION
1171 
1172             PROGRAM_CASE(SOM_ADJUST) {
1173                 os << "    distance " << ri->distance << endl;
1174             }
1175             PROGRAM_NEXT_INSTRUCTION
1176 
1177             PROGRAM_CASE(SOM_LEFTFIX) {
1178                 os << "    queue " << ri->queue << endl;
1179                 os << "    lag " << ri->lag << endl;
1180             }
1181             PROGRAM_NEXT_INSTRUCTION
1182 
1183             PROGRAM_CASE(SOM_FROM_REPORT) {
1184                 dumpSomOperation(os, ri->som);
1185             }
1186             PROGRAM_NEXT_INSTRUCTION
1187 
1188             PROGRAM_CASE(SOM_ZERO) {}
1189             PROGRAM_NEXT_INSTRUCTION
1190 
1191             PROGRAM_CASE(TRIGGER_INFIX) {
1192                 os << "    queue " << ri->queue << endl;
1193                 os << "    event " << ri->event << endl;
1194                 os << "    cancel " << u32{ri->cancel} << endl;
1195             }
1196             PROGRAM_NEXT_INSTRUCTION
1197 
1198             PROGRAM_CASE(TRIGGER_SUFFIX) {
1199                 os << "    queue " << ri->queue << endl;
1200                 os << "    event " << ri->event << endl;
1201             }
1202             PROGRAM_NEXT_INSTRUCTION
1203 
1204             PROGRAM_CASE(DEDUPE) {
1205                 os << "    quash_som " << u32{ri->quash_som} << endl;
1206                 os << "    dkey " << ri->dkey << endl;
1207                 os << "    offset_adjust " << ri->offset_adjust << endl;
1208                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1209             }
1210             PROGRAM_NEXT_INSTRUCTION
1211 
1212             PROGRAM_CASE(DEDUPE_SOM) {
1213                 os << "    quash_som " << u32{ri->quash_som} << endl;
1214                 os << "    dkey " << ri->dkey << endl;
1215                 os << "    offset_adjust " << ri->offset_adjust << endl;
1216                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1217             }
1218             PROGRAM_NEXT_INSTRUCTION
1219 
1220             PROGRAM_CASE(REPORT_CHAIN) {
1221                 os << "    event " << ri->event << endl;
1222                 os << "    top_squash_distance " << ri->top_squash_distance
1223                    << endl;
1224             }
1225             PROGRAM_NEXT_INSTRUCTION
1226 
1227             PROGRAM_CASE(REPORT_SOM_INT) {
1228                 dumpSomOperation(os, ri->som);
1229             }
1230             PROGRAM_NEXT_INSTRUCTION
1231 
1232             PROGRAM_CASE(REPORT_SOM_AWARE) {
1233                 dumpSomOperation(os, ri->som);
1234             }
1235             PROGRAM_NEXT_INSTRUCTION
1236 
1237             PROGRAM_CASE(REPORT) {
1238                 os << "    onmatch " << ri->onmatch << endl;
1239                 os << "    offset_adjust " << ri->offset_adjust << endl;
1240             }
1241             PROGRAM_NEXT_INSTRUCTION
1242 
1243             PROGRAM_CASE(REPORT_EXHAUST) {
1244                 os << "    onmatch " << ri->onmatch << endl;
1245                 os << "    offset_adjust " << ri->offset_adjust << endl;
1246                 os << "    ekey " << ri->ekey << endl;
1247             }
1248             PROGRAM_NEXT_INSTRUCTION
1249 
1250             PROGRAM_CASE(REPORT_SOM) {
1251                 os << "    onmatch " << ri->onmatch << endl;
1252                 os << "    offset_adjust " << ri->offset_adjust << endl;
1253             }
1254             PROGRAM_NEXT_INSTRUCTION
1255 
1256             PROGRAM_CASE(REPORT_SOM_EXHAUST) {
1257                 os << "    onmatch " << ri->onmatch << endl;
1258                 os << "    offset_adjust " << ri->offset_adjust << endl;
1259                 os << "    ekey " << ri->ekey << endl;
1260             }
1261             PROGRAM_NEXT_INSTRUCTION
1262 
1263             PROGRAM_CASE(DEDUPE_AND_REPORT) {
1264                 os << "    quash_som " << u32{ri->quash_som} << endl;
1265                 os << "    dkey " << ri->dkey << endl;
1266                 os << "    onmatch " << ri->onmatch << endl;
1267                 os << "    offset_adjust " << ri->offset_adjust << endl;
1268                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1269             }
1270             PROGRAM_NEXT_INSTRUCTION
1271 
1272             PROGRAM_CASE(FINAL_REPORT) {
1273                 os << "    onmatch " << ri->onmatch << endl;
1274                 os << "    offset_adjust " << ri->offset_adjust << endl;
1275             }
1276             PROGRAM_NEXT_INSTRUCTION
1277 
1278             PROGRAM_CASE(CHECK_EXHAUSTED) {
1279                 os << "    ekey " << ri->ekey << endl;
1280                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1281             }
1282             PROGRAM_NEXT_INSTRUCTION
1283 
1284             PROGRAM_CASE(CHECK_MIN_LENGTH) {
1285                 os << "    end_adj " << ri->end_adj << endl;
1286                 os << "    min_length " << ri->min_length << endl;
1287                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1288             }
1289             PROGRAM_NEXT_INSTRUCTION
1290 
1291             PROGRAM_CASE(SET_STATE) {
1292                 os << "    index " << ri->index << endl;
1293             }
1294             PROGRAM_NEXT_INSTRUCTION
1295 
1296             PROGRAM_CASE(SET_GROUPS) {
1297                 os << "    groups 0x" << std::hex << ri->groups << std::dec
1298                    << endl;
1299             }
1300             PROGRAM_NEXT_INSTRUCTION
1301 
1302             PROGRAM_CASE(SQUASH_GROUPS) {
1303                 os << "    groups 0x" << std::hex << ri->groups << std::dec
1304                    << endl;
1305             }
1306             PROGRAM_NEXT_INSTRUCTION
1307 
1308             PROGRAM_CASE(CHECK_STATE) {
1309                 os << "    index " << ri->index << endl;
1310                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1311             }
1312             PROGRAM_NEXT_INSTRUCTION
1313 
1314             PROGRAM_CASE(SPARSE_ITER_BEGIN) {
1315                 os << "    iter_offset " << ri->iter_offset << endl;
1316                 os << "    jump_table " << ri->jump_table << endl;
1317                 dumpJumpTable(os, t, ri);
1318                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1319             }
1320             PROGRAM_NEXT_INSTRUCTION
1321 
1322             PROGRAM_CASE(SPARSE_ITER_NEXT) {
1323                 os << "    iter_offset " << ri->iter_offset << endl;
1324                 os << "    jump_table " << ri->jump_table << endl;
1325                 os << "    state " << ri->state << endl;
1326                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1327             }
1328             PROGRAM_NEXT_INSTRUCTION
1329 
1330             PROGRAM_CASE(SPARSE_ITER_ANY) {
1331                 os << "    iter_offset " << ri->iter_offset << endl;
1332                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1333             }
1334             PROGRAM_NEXT_INSTRUCTION
1335 
1336             PROGRAM_CASE(ENGINES_EOD) {
1337                 os << "    iter_offset " << ri->iter_offset << endl;
1338             }
1339             PROGRAM_NEXT_INSTRUCTION
1340 
1341             PROGRAM_CASE(SUFFIXES_EOD) {}
1342             PROGRAM_NEXT_INSTRUCTION
1343 
1344             PROGRAM_CASE(MATCHER_EOD) {}
1345             PROGRAM_NEXT_INSTRUCTION
1346 
1347             PROGRAM_CASE(CHECK_LONG_LIT) {
1348                 os << "    lit_offset " << ri->lit_offset << endl;
1349                 os << "    lit_length " << ri->lit_length << endl;
1350                 const char *lit = (const char *)t + ri->lit_offset;
1351                 os << "    literal: \""
1352                    << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1353                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1354             }
1355             PROGRAM_NEXT_INSTRUCTION
1356 
1357             PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
1358                 os << "    lit_offset " << ri->lit_offset << endl;
1359                 os << "    lit_length " << ri->lit_length << endl;
1360                 const char *lit = (const char *)t + ri->lit_offset;
1361                 os << "    literal: \""
1362                    << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1363                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1364             }
1365             PROGRAM_NEXT_INSTRUCTION
1366 
1367             PROGRAM_CASE(CHECK_MED_LIT) {
1368                 os << "    lit_offset " << ri->lit_offset << endl;
1369                 os << "    lit_length " << ri->lit_length << endl;
1370                 const char *lit = (const char *)t + ri->lit_offset;
1371                 os << "    literal: \""
1372                    << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1373                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1374             }
1375             PROGRAM_NEXT_INSTRUCTION
1376 
1377             PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
1378                 os << "    lit_offset " << ri->lit_offset << endl;
1379                 os << "    lit_length " << ri->lit_length << endl;
1380                 const char *lit = (const char *)t + ri->lit_offset;
1381                 os << "    literal: \""
1382                    << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
1383                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1384             }
1385             PROGRAM_NEXT_INSTRUCTION
1386 
1387             PROGRAM_CASE(CLEAR_WORK_DONE) {}
1388             PROGRAM_NEXT_INSTRUCTION
1389 
1390             PROGRAM_CASE(MULTIPATH_LOOKAROUND) {
1391                 os << "    look_index " << ri->look_index << endl;
1392                 os << "    reach_index " << ri->reach_index << endl;
1393                 os << "    count " << ri->count << endl;
1394                 os << "    last_start " << ri->last_start << endl;
1395                 os << "    start_mask "
1396                    << dumpStrMask(ri->start_mask, sizeof(ri->start_mask))
1397                    << endl;
1398                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1399                 dumpMultipathLookaround(os, t, ri);
1400             }
1401             PROGRAM_NEXT_INSTRUCTION
1402 
1403             PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) {
1404                 os << "    nib_mask "
1405                    << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask))
1406                    << endl;
1407                 os << "    bucket_select_mask "
1408                    << dumpStrMask(ri->bucket_select_mask,
1409                                   sizeof(ri->bucket_select_mask))
1410                    << endl;
1411                 os << "    data_select_mask "
1412                    << dumpStrMask(ri->data_select_mask,
1413                                   sizeof(ri->data_select_mask))
1414                    << endl;
1415                 os << "    hi_bits_mask 0x" << std::hex << std::setw(4)
1416                    << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1417                 os << "    lo_bits_mask 0x" << std::hex << std::setw(4)
1418                    << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1419                 os << "    neg_mask 0x" << std::hex << std::setw(4)
1420                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1421                 os << "    base_offset " << ri->base_offset << endl;
1422                 os << "    last_start " << ri->last_start << endl;
1423                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1424                 dumpMultipathShufti(os, 16, ri->nib_mask, ri->nib_mask + 16,
1425                                     ri->bucket_select_mask,
1426                                     ri->data_select_mask,
1427                                     ri->neg_mask, ri->base_offset);
1428             }
1429             PROGRAM_NEXT_INSTRUCTION
1430 
1431             PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) {
1432                 os << "    hi_mask "
1433                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1434                    << endl;
1435                 os << "    lo_mask "
1436                    << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1437                    << endl;
1438                 os << "    bucket_select_mask "
1439                    << dumpStrMask(ri->bucket_select_mask,
1440                                   sizeof(ri->bucket_select_mask))
1441                    << endl;
1442                 os << "    data_select_mask "
1443                    << dumpStrMask(ri->data_select_mask,
1444                                   sizeof(ri->data_select_mask))
1445                    << endl;
1446                 os << "    hi_bits_mask 0x" << std::hex << std::setw(8)
1447                    << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1448                 os << "    lo_bits_mask 0x" << std::hex << std::setw(8)
1449                    << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1450                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1451                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1452                 os << "    base_offset " << ri->base_offset << endl;
1453                 os << "    last_start " << ri->last_start << endl;
1454                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1455                 dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask,
1456                                     ri->bucket_select_mask,
1457                                     ri->data_select_mask,
1458                                     ri->neg_mask, ri->base_offset);
1459             }
1460             PROGRAM_NEXT_INSTRUCTION
1461 
1462             PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) {
1463                 os << "    hi_mask "
1464                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1465                    << endl;
1466                 os << "    lo_mask "
1467                    << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1468                    << endl;
1469                 os << "    bucket_select_mask_hi "
1470                    << dumpStrMask(ri->bucket_select_mask_hi,
1471                                   sizeof(ri->bucket_select_mask_hi))
1472                    << endl;
1473                 os << "    bucket_select_mask_lo "
1474                    << dumpStrMask(ri->bucket_select_mask_lo,
1475                                   sizeof(ri->bucket_select_mask_lo))
1476                    << endl;
1477                 os << "    data_select_mask "
1478                    << dumpStrMask(ri->data_select_mask,
1479                                   sizeof(ri->data_select_mask))
1480                    << endl;
1481                 os << "    hi_bits_mask 0x" << std::hex << std::setw(8)
1482                    << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1483                 os << "    lo_bits_mask 0x" << std::hex << std::setw(8)
1484                    << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1485                 os << "    neg_mask 0x" << std::hex << std::setw(8)
1486                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1487                 os << "    base_offset " << ri->base_offset << endl;
1488                 os << "    last_start " << ri->last_start << endl;
1489                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1490                 dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask,
1491                                     ri->lo_mask + 16, ri->hi_mask + 16,
1492                                     ri->bucket_select_mask_lo,
1493                                     ri->bucket_select_mask_hi,
1494                                     ri->data_select_mask,
1495                                     ri->neg_mask, ri->base_offset);
1496             }
1497             PROGRAM_NEXT_INSTRUCTION
1498 
1499             PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) {
1500                 os << "    hi_mask "
1501                    << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
1502                    << endl;
1503                 os << "    lo_mask "
1504                    << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
1505                    << endl;
1506                 os << "    bucket_select_mask "
1507                    << dumpStrMask(ri->bucket_select_mask,
1508                                   sizeof(ri->bucket_select_mask))
1509                    << endl;
1510                 os << "    data_select_mask "
1511                    << dumpStrMask(ri->data_select_mask,
1512                                   sizeof(ri->data_select_mask))
1513                    << endl;
1514                 os << "    hi_bits_mask 0x" << std::hex << std::setw(16)
1515                    << std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
1516                 os << "    lo_bits_mask 0x" << std::hex << std::setw(16)
1517                    << std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
1518                 os << "    neg_mask 0x" << std::hex << std::setw(16)
1519                    << std::setfill('0') << ri->neg_mask << std::dec << endl;
1520                 os << "    base_offset " << ri->base_offset << endl;
1521                 os << "    last_start " << ri->last_start << endl;
1522                 os << "    fail_jump " << offset + ri->fail_jump << endl;
1523                 dumpMultipathShufti(os, 64, ri->lo_mask, ri->hi_mask,
1524                                     ri->bucket_select_mask,
1525                                     ri->data_select_mask,
1526                                     ri->neg_mask, ri->base_offset);
1527             }
1528             PROGRAM_NEXT_INSTRUCTION
1529 
1530             PROGRAM_CASE(INCLUDED_JUMP) {
1531                 os << "    child_offset " << ri->child_offset << endl;
1532                 os << "    squash " << (u32)ri->squash << endl;
1533             }
1534             PROGRAM_NEXT_INSTRUCTION
1535 
1536             PROGRAM_CASE(SET_LOGICAL) {
1537                 os << "    lkey " << ri->lkey << endl;
1538                 os << "    offset_adjust " << ri->offset_adjust << endl;
1539             }
1540             PROGRAM_NEXT_INSTRUCTION
1541 
1542             PROGRAM_CASE(SET_COMBINATION) {
1543                 os << "    ckey " << ri->ckey << endl;
1544             }
1545             PROGRAM_NEXT_INSTRUCTION
1546 
1547             PROGRAM_CASE(FLUSH_COMBINATION) {}
1548             PROGRAM_NEXT_INSTRUCTION
1549 
1550             PROGRAM_CASE(SET_EXHAUST) {
1551                 os << "    ekey " << ri->ekey << endl;
1552             }
1553             PROGRAM_NEXT_INSTRUCTION
1554 
1555             PROGRAM_CASE(LAST_FLUSH_COMBINATION) {}
1556             PROGRAM_NEXT_INSTRUCTION
1557 
1558         default:
1559             os << "  UNKNOWN (code " << int{code} << ")" << endl;
1560             os << "  <stopping>" << endl;
1561             return;
1562         }
1563     }
1564 }
1565 
1566 #undef PROGRAM_CASE
1567 #undef PROGRAM_NEXT_INSTRUCTION
1568 
1569 static
dumpRoseLitPrograms(const vector<LitFragment> & fragments,const RoseEngine * t,const string & filename)1570 void dumpRoseLitPrograms(const vector<LitFragment> &fragments,
1571                          const RoseEngine *t, const string &filename) {
1572     ofstream os(filename);
1573 
1574     // Collect all programs referenced by a literal fragment.
1575     vector<u32> programs;
1576     for (const auto &frag : fragments) {
1577         if (frag.lit_program_offset) {
1578             programs.push_back(frag.lit_program_offset);
1579         }
1580         if (frag.delay_program_offset) {
1581             programs.push_back(frag.delay_program_offset);
1582         }
1583     }
1584     sort_and_unique(programs);
1585 
1586     for (u32 prog_offset : programs) {
1587         os << "Program @ " << prog_offset << ":" << endl;
1588         const char *prog = (const char *)loadFromByteCodeOffset(t, prog_offset);
1589         dumpProgram(os, t, prog);
1590         os << endl;
1591     }
1592 
1593     os.close();
1594 }
1595 
1596 static
dumpRoseEodPrograms(const RoseEngine * t,const string & filename)1597 void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
1598     ofstream os(filename);
1599     const char *base = (const char *)t;
1600 
1601     if (t->eodProgramOffset) {
1602         os << "EOD Program @ " << t->eodProgramOffset << ":" << endl;
1603         dumpProgram(os, t, base + t->eodProgramOffset);
1604         os << endl;
1605     } else {
1606         os << "<No EOD Program>" << endl;
1607     }
1608 
1609     os.close();
1610 }
1611 
1612 static
dumpRoseFlushCombPrograms(const RoseEngine * t,const string & filename)1613 void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
1614     ofstream os(filename);
1615     const char *base = (const char *)t;
1616 
1617     if (t->flushCombProgramOffset) {
1618         os << "Flush Combination Program @ " << t->flushCombProgramOffset
1619            << ":" << endl;
1620         dumpProgram(os, t, base + t->flushCombProgramOffset);
1621         os << endl;
1622     } else {
1623         os << "<No Flush Combination Program>" << endl;
1624     }
1625 
1626     os.close();
1627 }
1628 
1629 static
dumpRoseLastFlushCombPrograms(const RoseEngine * t,const string & filename)1630 void dumpRoseLastFlushCombPrograms(const RoseEngine *t,
1631                                    const string &filename) {
1632     ofstream os(filename);
1633     const char *base = (const char *)t;
1634 
1635     if (t->lastFlushCombProgramOffset) {
1636         os << "Last Flush Combination Program @ "
1637            << t->lastFlushCombProgramOffset
1638            << ":" << endl;
1639         dumpProgram(os, t, base + t->lastFlushCombProgramOffset);
1640         os << endl;
1641     } else {
1642         os << "<No Last Flush Combination Program>" << endl;
1643     }
1644 
1645     os.close();
1646 }
1647 
1648 static
dumpRoseReportPrograms(const RoseEngine * t,const string & filename)1649 void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
1650     ofstream os(filename);
1651 
1652     const u32 *programs =
1653         (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset);
1654 
1655     for (u32 i = 0; i < t->reportProgramCount; i++) {
1656         os << "Report " << i << endl;
1657         os << "---------------" << endl;
1658 
1659         if (programs[i]) {
1660             os << "Program @ " << programs[i] << ":" << endl;
1661             const char *prog =
1662                 (const char *)loadFromByteCodeOffset(t, programs[i]);
1663             dumpProgram(os, t, prog);
1664         } else {
1665             os << "<No Program>" << endl;
1666         }
1667     }
1668 
1669     os.close();
1670 }
1671 
1672 static
dumpRoseAnchoredPrograms(const RoseEngine * t,const string & filename)1673 void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) {
1674     ofstream os(filename);
1675 
1676     const u32 *programs =
1677         (const u32 *)loadFromByteCodeOffset(t, t->anchoredProgramOffset);
1678 
1679     for (u32 i = 0; i < t->anchored_count; i++) {
1680         os << "Anchored entry " << i << endl;
1681         os << "---------------" << endl;
1682 
1683         if (programs[i]) {
1684             os << "Program @ " << programs[i] << ":" << endl;
1685             const char *prog =
1686                 (const char *)loadFromByteCodeOffset(t, programs[i]);
1687             dumpProgram(os, t, prog);
1688         } else {
1689             os << "<No Program>" << endl;
1690         }
1691         os << endl;
1692     }
1693 
1694     os.close();
1695 }
1696 
1697 static
dumpRoseDelayPrograms(const RoseEngine * t,const string & filename)1698 void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) {
1699     ofstream os(filename);
1700 
1701     const u32 *programs =
1702         (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset);
1703 
1704     for (u32 i = 0; i < t->delay_count; i++) {
1705         os << "Delay entry " << i << endl;
1706         os << "---------------" << endl;
1707 
1708         if (programs[i]) {
1709             os << "Program @ " << programs[i] << ":" << endl;
1710             const char *prog =
1711                 (const char *)loadFromByteCodeOffset(t, programs[i]);
1712             dumpProgram(os, t, prog);
1713         } else {
1714             os << "<No Program>" << endl;
1715         }
1716         os << endl;
1717     }
1718 
1719     os.close();
1720 }
1721 
1722 static
dumpNfaNotes(ofstream & fout,const RoseEngine * t,const NFA * n)1723 void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) {
1724     const u32 qindex = n->queueIndex;
1725 
1726     if (qindex < t->outfixBeginQueue) {
1727         fout << "chained";
1728         return;
1729     }
1730 
1731     if (qindex < t->outfixEndQueue) {
1732         fout << "outfix";
1733         return;
1734     }
1735 
1736     const NfaInfo *nfa_info = getNfaInfoByQueue(t, qindex);
1737     const NFA *nfa = getNfaByInfo(t, nfa_info);
1738 
1739     if (nfa_info->eod) {
1740         fout << "eod ";
1741     }
1742 
1743     if (qindex < t->leftfixBeginQueue) {
1744         fout << "suffix";
1745         return;
1746     }
1747 
1748     const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex);
1749     if (left->eager) {
1750         fout << "eager ";
1751     }
1752     if (left->transient) {
1753         fout << "transient " << (u32)left->transient << " ";
1754     }
1755     if (left->infix) {
1756         fout << "infix";
1757         u32 maxQueueLen = left->maxQueueLen;
1758         if (maxQueueLen != (u32)(-1)) {
1759             fout << " maxqlen=" << maxQueueLen;
1760         }
1761     } else {
1762         fout << "prefix";
1763     }
1764     fout << " maxlag=" << left->maxLag;
1765     if (left->stopTable) {
1766         fout << " miracles";
1767     }
1768     if (left->countingMiracleOffset) {
1769         const RoseCountingMiracle *cm
1770             = (const RoseCountingMiracle *)((const char *)t
1771                                             + left->countingMiracleOffset);
1772         fout << " counting_miracle:" << (int)cm->count
1773              << (cm->shufti ? "s" : "v");
1774     }
1775     if (nfaSupportsZombie(nfa)) {
1776         fout << " zombie";
1777     }
1778     if (left->eod_check) {
1779         fout << " eod";
1780     }
1781 }
1782 
1783 static
dumpComponentInfo(const RoseEngine * t,const string & base)1784 void dumpComponentInfo(const RoseEngine *t, const string &base) {
1785     stringstream ss;
1786     ss << base << "rose_components.txt";
1787     ofstream fout(ss.str().c_str());
1788 
1789     fout << "Index  Offset\tEngine               \tStates S.State Bytes   Notes\n";
1790 
1791     for (u32 i = 0; i < t->queueCount; i++) {
1792         const NfaInfo *nfa_info = getNfaInfoByQueue(t, i);
1793         const NFA *n = getNfaByInfo(t, nfa_info);
1794 
1795         fout << left << setw(6) << i << " ";
1796 
1797         fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */
1798 
1799         fout << left << setw(16) << describe(*n) << "\t";
1800 
1801         fout << left << setw(6) << n->nPositions << " ";
1802         fout << left << setw(7) << n->streamStateSize << " ";
1803         fout << left << setw(7) << n->length << " ";
1804 
1805         dumpNfaNotes(fout, t, n);
1806 
1807         fout << endl;
1808     }
1809 }
1810 
1811 static
dumpComponentInfoCsv(const RoseEngine * t,const string & base)1812 void dumpComponentInfoCsv(const RoseEngine *t, const string &base) {
1813     StdioFile f(base + "/rose_components.csv", "w");
1814 
1815     fprintf(f, "Index, Offset,Engine Type,States,Stream State,"
1816                "Bytecode Size,Kind,Notes\n");
1817 
1818     for (u32 i = 0; i < t->queueCount; i++) {
1819         const NfaInfo *nfa_info = getNfaInfoByQueue(t, i);
1820         const NFA *n = getNfaByInfo(t, nfa_info);
1821         nfa_kind kind;
1822         stringstream notes;
1823 
1824         if (i < t->outfixBeginQueue) {
1825             notes << "chained;";
1826         }
1827 
1828         if (nfa_info->eod) {
1829             notes << "eod;";
1830         }
1831 
1832         if (i < t->outfixEndQueue) {
1833             kind = NFA_OUTFIX;
1834         } else if (i < t->leftfixBeginQueue) {
1835             kind = NFA_SUFFIX;
1836         } else {
1837             const LeftNfaInfo *left = getLeftInfoByQueue(t, i);
1838             if (left->eager) {
1839                 notes << "eager;";
1840             }
1841             if (left->transient) {
1842                 notes << "transient " << (u32)left->transient << ";";
1843             }
1844             if (left->infix) {
1845                 kind = NFA_INFIX;
1846                 u32 maxQueueLen = left->maxQueueLen;
1847                 if (maxQueueLen != (u32)(-1)) {
1848                     notes << "maxqlen=" << maxQueueLen << ";";
1849                 }
1850             } else {
1851                 kind = NFA_PREFIX;
1852             }
1853             notes << "maxlag=" << left->maxLag << ";";
1854             if (left->stopTable) {
1855                 notes << "miracles;";
1856             }
1857             if (left->countingMiracleOffset) {
1858                 auto cm = (const RoseCountingMiracle *)
1859                     ((const char *)t + left->countingMiracleOffset);
1860                 notes << "counting_miracle:" << (int)cm->count
1861                       << (cm->shufti ? "s" : "v") << ";";
1862             }
1863             if (nfaSupportsZombie(n)) {
1864                 notes << " zombie;";
1865             }
1866             if (left->eod_check) {
1867             notes << "left_eod;";
1868             }
1869         }
1870 
1871         fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i,
1872                 (const char *)n - (const char *)t, describe(*n).c_str(),
1873                 n->nPositions, n->streamStateSize, n->length,
1874                 to_string(kind).c_str(), notes.str().c_str());
1875     }
1876 }
1877 
1878 static
dumpExhaust(const RoseEngine * t,const string & base)1879 void dumpExhaust(const RoseEngine *t, const string &base) {
1880     StdioFile f(base + "/rose_exhaust.csv", "w");
1881 
1882     const NfaInfo *infos
1883         = (const NfaInfo *)((const char *)t + t->nfaInfoOffset);
1884 
1885     u32 queue_count = t->activeArrayCount;
1886 
1887     for (u32 i = 0; i < queue_count; ++i) {
1888         u32 ekey_offset = infos[i].ekeyListOffset;
1889 
1890         fprintf(f, "%u (%u):", i, ekey_offset);
1891 
1892         if (ekey_offset) {
1893             const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset);
1894             while (1) {
1895                 u32 e = *ekeys;
1896                 ++ekeys;
1897                 if (e == ~0U) {
1898                     break;
1899                 }
1900                 fprintf(f, " %u", e);
1901             }
1902         }
1903 
1904         fprintf(f, "\n");
1905     }
1906 }
1907 
1908 static
dumpNfas(const RoseEngine * t,bool dump_raw,const string & base)1909 void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) {
1910     dumpExhaust(t, base);
1911 
1912     for (u32 i = 0; i < t->queueCount; i++) {
1913         const NfaInfo *nfa_info = getNfaInfoByQueue(t, i);
1914         const NFA *n = getNfaByInfo(t, nfa_info);
1915 
1916         stringstream ssbase;
1917         ssbase << base << "rose_nfa_" << i;
1918         nfaGenerateDumpFiles(n, ssbase.str());
1919 
1920         if (dump_raw) {
1921             stringstream ssraw;
1922             ssraw << base << "rose_nfa_" << i << ".raw";
1923             StdioFile f(ssraw.str(), "w");
1924             fwrite(n, 1, n->length, f);
1925         }
1926     }
1927 }
1928 
1929 static
dumpRevComponentInfo(const RoseEngine * t,const string & base)1930 void dumpRevComponentInfo(const RoseEngine *t, const string &base) {
1931     stringstream ss;
1932     ss << base << "som_rev_components.txt";
1933     ofstream fout(ss.str().c_str());
1934 
1935     fout << "Index  Offset\tEngine               \tStates S.State Bytes\n";
1936 
1937     const char *tp = (const char *)t;
1938     const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset);
1939 
1940     for (u32 i = 0; i < t->somRevCount; i++) {
1941         u32 offset = rev_offsets[i];
1942         const NFA *n = (const NFA *)(tp + offset);
1943 
1944         fout << left << setw(6) << i << " ";
1945 
1946         fout << left << offset << "\t"; /* offset */
1947 
1948         fout << left << setw(16) << describe(*n) << "\t";
1949 
1950         fout << left << setw(6) << n->nPositions << " ";
1951         fout << left << setw(7) << n->streamStateSize << " ";
1952         fout << left << setw(7) << n->length;
1953         fout << endl;
1954     }
1955 }
1956 
1957 static
dumpRevNfas(const RoseEngine * t,bool dump_raw,const string & base)1958 void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) {
1959     const char *tp = (const char *)t;
1960     const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset);
1961 
1962     for (u32 i = 0; i < t->somRevCount; i++) {
1963         const NFA *n = (const NFA *)(tp + rev_offsets[i]);
1964 
1965         stringstream ssbase;
1966         ssbase << base << "som_rev_nfa_" << i;
1967         nfaGenerateDumpFiles(n, ssbase.str());
1968 
1969         if (dump_raw) {
1970             stringstream ssraw;
1971             ssraw << base << "som_rev_nfa_" << i << ".raw";
1972             StdioFile f(ssraw.str(), "w");
1973             fwrite(n, 1, n->length, f);
1974         }
1975     }
1976 }
1977 
1978 static
dumpAnchored(const RoseEngine * t,const string & base)1979 void dumpAnchored(const RoseEngine *t, const string &base) {
1980     u32 i = 0;
1981     const anchored_matcher_info *curr
1982         = (const anchored_matcher_info *)getALiteralMatcher(t);
1983 
1984     while (curr) {
1985         const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr));
1986 
1987         stringstream ssbase;
1988         ssbase << base << "anchored_" << i;
1989         nfaGenerateDumpFiles(n, ssbase.str());
1990 
1991         curr = curr->next_offset ? (const anchored_matcher_info *)
1992             ((const char *)curr + curr->next_offset) : nullptr;
1993         i++;
1994     };
1995 }
1996 
1997 static
dumpAnchoredStats(const void * atable,FILE * f)1998 void dumpAnchoredStats(const void *atable, FILE *f) {
1999     assert(atable);
2000 
2001     u32 i = 0;
2002     const anchored_matcher_info *curr = (const anchored_matcher_info *)atable;
2003 
2004     while (curr) {
2005         const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr));
2006 
2007         fprintf(f, "  NFA %u: %s, %u states (%u bytes)\n", i,
2008                 describe(*n).c_str(), n->nPositions, n->length);
2009 
2010         curr = curr->next_offset ? (const anchored_matcher_info *)
2011             ((const char *)curr + curr->next_offset) : nullptr;
2012         i++;
2013     };
2014 
2015 }
2016 
2017 static
dumpLongLiteralSubtable(const RoseLongLitTable * ll_table,const RoseLongLitSubtable * ll_sub,FILE * f)2018 void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table,
2019                              const RoseLongLitSubtable *ll_sub, FILE *f) {
2020     if (!ll_sub->hashBits) {
2021         fprintf(f, "      <no table>\n");
2022         return;
2023     }
2024 
2025     const char *base = (const char *)ll_table;
2026 
2027     u32 nbits = ll_sub->hashBits;
2028     u32 num_entries = 1U << nbits;
2029     const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset);
2030     u32 hash_occ =
2031         count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) {
2032             return ent.str_offset != 0;
2033         });
2034     float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100;
2035 
2036     fprintf(f, "      hash table   : %u bits, occupancy %u/%u (%0.1f%%)\n",
2037             nbits, hash_occ, num_entries, hash_occ_percent);
2038 
2039     u32 bloom_bits = ll_sub->bloomBits;
2040     u32 bloom_size = 1U << bloom_bits;
2041     const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset;
2042     u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0,
2043         [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); });
2044     float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100;
2045 
2046     fprintf(f, "      bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n",
2047             bloom_bits, bloom_occ, bloom_size, bloom_occ_percent);
2048 }
2049 
2050 static
dumpLongLiteralTable(const RoseEngine * t,FILE * f)2051 void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
2052     if (!t->longLitTableOffset) {
2053         return;
2054     }
2055 
2056     fprintf(f, "\n");
2057     fprintf(f, "Long literal table (streaming):\n");
2058 
2059     const auto *ll_table =
2060         (const struct RoseLongLitTable *)loadFromByteCodeOffset(
2061             t, t->longLitTableOffset);
2062 
2063     fprintf(f, "    total size     : %u bytes\n", ll_table->size);
2064     fprintf(f, "    longest len    : %u\n", ll_table->maxLen);
2065     fprintf(f, "    stream state   : %u bytes\n", ll_table->streamStateBytes);
2066 
2067     fprintf(f, "    caseful:\n");
2068     dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f);
2069 
2070     fprintf(f, "    nocase:\n");
2071     dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f);
2072 }
2073 
2074 static
roseDumpText(const RoseEngine * t,FILE * f)2075 void roseDumpText(const RoseEngine *t, FILE *f) {
2076     if (!t) {
2077         fprintf(f, "<< no rose >>\n");
2078         return;
2079     }
2080 
2081     const void *atable = getAnchoredMatcher(t);
2082     const HWLM *ftable = getFloatingMatcher(t);
2083     const HWLM *drtable = getDelayRebuildMatcher(t);
2084     const HWLM *etable = getEodMatcher(t);
2085     const HWLM *sbtable = getSmallBlockMatcher(t);
2086 
2087     fprintf(f, "Rose:\n\n");
2088 
2089     fprintf(f, "mode:                : ");
2090     switch(t->mode) {
2091     case HS_MODE_BLOCK:
2092         fprintf(f, "block");
2093         break;
2094     case HS_MODE_STREAM:
2095         fprintf(f, "streaming");
2096         break;
2097     case HS_MODE_VECTORED:
2098         fprintf(f, "vectored");
2099         break;
2100     }
2101     fprintf(f, "\n");
2102 
2103     fprintf(f, "properties           :");
2104     if (t->canExhaust) {
2105         fprintf(f, " canExhaust");
2106     }
2107     if (t->hasSom) {
2108         fprintf(f, " hasSom");
2109     }
2110     if (t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL) {
2111         fprintf(f, " pureLiteral");
2112     }
2113     if (t->runtimeImpl == ROSE_RUNTIME_SINGLE_OUTFIX) {
2114         fprintf(f, " soleOutfix");
2115     }
2116     fprintf(f, "\n");
2117 
2118     fprintf(f, "dkey count           : %u\n", t->dkeyCount);
2119     fprintf(f, "som slot count       : %u\n", t->somLocationCount);
2120     fprintf(f, "som width            : %u bytes\n", t->somHorizon);
2121     fprintf(f, "rose count           : %u\n", t->roseCount);
2122     fprintf(f, "\n");
2123 
2124     fprintf(f, "total engine size    : %u bytes\n", t->size);
2125     fprintf(f, " - anchored matcher  : %u bytes over %u bytes\n", t->asize,
2126             t->anchoredDistance);
2127     fprintf(f, " - floating matcher  : %zu bytes%s",
2128             ftable ? hwlmSize(ftable) : 0, t->noFloatingRoots ? " (cond)":"");
2129     if (t->floatingMinDistance) {
2130         fprintf(f, " from %s bytes\n",
2131                 rose_off(t->floatingMinDistance).str().c_str());
2132     }
2133     if (t->floatingDistance != ROSE_BOUND_INF && ftable) {
2134         fprintf(f, " over %u bytes\n", t->floatingDistance);
2135     } else {
2136         fprintf(f, "\n");
2137     }
2138     fprintf(f, " - delay-rb matcher  : %zu bytes\n",
2139             drtable ? hwlmSize(drtable) : 0);
2140     fprintf(f, " - eod-anch matcher  : %zu bytes over last %u bytes\n",
2141             etable ? hwlmSize(etable) : 0, t->ematcherRegionSize);
2142     fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n",
2143             sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance);
2144     fprintf(f, " - role state table  : %zu bytes\n",
2145             t->rolesWithStateCount * sizeof(u32));
2146     fprintf(f, " - nfa info table    : %zu bytes\n",
2147             t->queueCount * sizeof(NfaInfo));
2148 
2149     fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end);
2150     fprintf(f, " - history buffer    : %u bytes\n", t->historyRequired);
2151     fprintf(f, " - exhaustion vector : %u bytes\n",
2152             t->stateOffsets.exhausted_size);
2153     fprintf(f, " - logical vector    : %u bytes\n",
2154             t->stateOffsets.logicalVec_size);
2155     fprintf(f, " - combination vector: %u bytes\n",
2156             t->stateOffsets.combVec_size);
2157     fprintf(f, " - role state mmbit  : %u bytes\n", t->stateSize);
2158     fprintf(f, " - long lit matcher  : %u bytes\n", t->longLitStreamState);
2159     fprintf(f, " - active array      : %u bytes\n",
2160             t->stateOffsets.activeLeafArray_size);
2161     fprintf(f, " - active rose       : %u bytes\n",
2162             t->stateOffsets.activeLeftArray_size);
2163     fprintf(f, " - anchored state    : %u bytes\n", t->anchorStateSize);
2164     fprintf(f, " - nfa state         : %u bytes\n",
2165             t->stateOffsets.end - t->stateOffsets.nfaStateBegin);
2166     fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize);
2167     fprintf(f, " - one whole bytes   : %u bytes\n",
2168             t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable);
2169     fprintf(f, " - groups            : %u bytes\n",
2170             t->stateOffsets.groups_size);
2171     fprintf(f, "\n");
2172 
2173     fprintf(f, "initial groups       : 0x%016llx\n", t->initialGroups);
2174     fprintf(f, "floating groups      : 0x%016llx\n", t->floating_group_mask);
2175     fprintf(f, "handled key count    : %u\n", t->handledKeyCount);
2176     fprintf(f, "\n");
2177 
2178     fprintf(f, "total literal count  : %u\n", t->totalNumLiterals);
2179     fprintf(f, "  delayed literals   : %u\n", t->delay_count);
2180 
2181     fprintf(f, "\n");
2182     fprintf(f, "  minWidth                    : %u\n", t->minWidth);
2183     fprintf(f, "  minWidthExcludingBoundaries : %u\n",
2184             t->minWidthExcludingBoundaries);
2185     fprintf(f, "  maxBiAnchoredWidth          : %s\n",
2186             rose_off(t->maxBiAnchoredWidth).str().c_str());
2187     fprintf(f, "  minFloatLitMatchOffset      : %s\n",
2188             rose_off(t->floatingMinLiteralMatchOffset).str().c_str());
2189     fprintf(f, "  maxFloatingDelayedMatch     : %s\n",
2190             rose_off(t->maxFloatingDelayedMatch).str().c_str());
2191 
2192     if (atable) {
2193         fprintf(f, "\nAnchored literal matcher stats:\n\n");
2194         dumpAnchoredStats(atable, f);
2195     }
2196 
2197     dumpLongLiteralTable(t, f);
2198 }
2199 
2200 #define DUMP_U8(o, member)                                              \
2201     fprintf(f, "    %-32s: %hhu/%hhx\n", #member, o->member, o->member)
2202 #define DUMP_U32(o, member)                                             \
2203     fprintf(f, "    %-32s: %u/%08x\n", #member, o->member, o->member)
2204 #define DUMP_U64(o, member)                                             \
2205     fprintf(f, "    %-32s: %llu/%016llx\n", #member, o->member, o->member)
2206 
2207 static
roseDumpStructRaw(const RoseEngine * t,FILE * f)2208 void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
2209     fprintf(f, "struct RoseEngine {\n");
2210     DUMP_U8(t, noFloatingRoots);
2211     DUMP_U8(t, requiresEodCheck);
2212     DUMP_U8(t, hasOutfixesInSmallBlock);
2213     DUMP_U8(t, runtimeImpl);
2214     DUMP_U8(t, mpvTriggeredByLeaf);
2215     DUMP_U8(t, canExhaust);
2216     DUMP_U8(t, hasSom);
2217     DUMP_U8(t, somHorizon);
2218     DUMP_U32(t, mode);
2219     DUMP_U32(t, historyRequired);
2220     DUMP_U32(t, ekeyCount);
2221     DUMP_U32(t, lkeyCount);
2222     DUMP_U32(t, lopCount);
2223     DUMP_U32(t, ckeyCount);
2224     DUMP_U32(t, logicalTreeOffset);
2225     DUMP_U32(t, combInfoMapOffset);
2226     DUMP_U32(t, dkeyCount);
2227     DUMP_U32(t, dkeyLogSize);
2228     DUMP_U32(t, invDkeyOffset);
2229     DUMP_U32(t, somLocationCount);
2230     DUMP_U32(t, somLocationFatbitSize);
2231     DUMP_U32(t, rolesWithStateCount);
2232     DUMP_U32(t, stateSize);
2233     DUMP_U32(t, anchorStateSize);
2234     DUMP_U32(t, tStateSize);
2235     DUMP_U32(t, smallWriteOffset);
2236     DUMP_U32(t, amatcherOffset);
2237     DUMP_U32(t, ematcherOffset);
2238     DUMP_U32(t, fmatcherOffset);
2239     DUMP_U32(t, drmatcherOffset);
2240     DUMP_U32(t, sbmatcherOffset);
2241     DUMP_U32(t, longLitTableOffset);
2242     DUMP_U32(t, amatcherMinWidth);
2243     DUMP_U32(t, fmatcherMinWidth);
2244     DUMP_U32(t, eodmatcherMinWidth);
2245     DUMP_U32(t, amatcherMaxBiAnchoredWidth);
2246     DUMP_U32(t, fmatcherMaxBiAnchoredWidth);
2247     DUMP_U32(t, reportProgramOffset);
2248     DUMP_U32(t, reportProgramCount);
2249     DUMP_U32(t, delayProgramOffset);
2250     DUMP_U32(t, anchoredProgramOffset);
2251     DUMP_U32(t, activeArrayCount);
2252     DUMP_U32(t, activeLeftCount);
2253     DUMP_U32(t, queueCount);
2254     DUMP_U32(t, activeQueueArraySize);
2255     DUMP_U32(t, eagerIterOffset);
2256     DUMP_U32(t, handledKeyCount);
2257     DUMP_U32(t, handledKeyFatbitSize);
2258     DUMP_U32(t, leftOffset);
2259     DUMP_U32(t, roseCount);
2260     DUMP_U32(t, eodProgramOffset);
2261     DUMP_U32(t, flushCombProgramOffset);
2262     DUMP_U32(t, lastByteHistoryIterOffset);
2263     DUMP_U32(t, minWidth);
2264     DUMP_U32(t, minWidthExcludingBoundaries);
2265     DUMP_U32(t, maxBiAnchoredWidth);
2266     DUMP_U32(t, anchoredDistance);
2267     DUMP_U32(t, anchoredMinDistance);
2268     DUMP_U32(t, floatingDistance);
2269     DUMP_U32(t, floatingMinDistance);
2270     DUMP_U32(t, smallBlockDistance);
2271     DUMP_U32(t, floatingMinLiteralMatchOffset);
2272     DUMP_U32(t, nfaInfoOffset);
2273     DUMP_U64(t, initialGroups);
2274     DUMP_U64(t, floating_group_mask);
2275     DUMP_U32(t, size);
2276     DUMP_U32(t, delay_count);
2277     DUMP_U32(t, delay_fatbit_size);
2278     DUMP_U32(t, anchored_count);
2279     DUMP_U32(t, anchored_fatbit_size);
2280     DUMP_U32(t, maxFloatingDelayedMatch);
2281     DUMP_U32(t, delayRebuildLength);
2282     DUMP_U32(t, stateOffsets.history);
2283     DUMP_U32(t, stateOffsets.exhausted);
2284     DUMP_U32(t, stateOffsets.exhausted_size);
2285     DUMP_U32(t, stateOffsets.logicalVec);
2286     DUMP_U32(t, stateOffsets.logicalVec_size);
2287     DUMP_U32(t, stateOffsets.combVec);
2288     DUMP_U32(t, stateOffsets.combVec_size);
2289     DUMP_U32(t, stateOffsets.activeLeafArray);
2290     DUMP_U32(t, stateOffsets.activeLeafArray_size);
2291     DUMP_U32(t, stateOffsets.activeLeftArray);
2292     DUMP_U32(t, stateOffsets.activeLeftArray_size);
2293     DUMP_U32(t, stateOffsets.leftfixLagTable);
2294     DUMP_U32(t, stateOffsets.anchorState);
2295     DUMP_U32(t, stateOffsets.groups);
2296     DUMP_U32(t, stateOffsets.groups_size);
2297     DUMP_U32(t, stateOffsets.longLitState);
2298     DUMP_U32(t, stateOffsets.longLitState_size);
2299     DUMP_U32(t, stateOffsets.somLocation);
2300     DUMP_U32(t, stateOffsets.somValid);
2301     DUMP_U32(t, stateOffsets.somWritable);
2302     DUMP_U32(t, stateOffsets.somMultibit_size);
2303     DUMP_U32(t, stateOffsets.nfaStateBegin);
2304     DUMP_U32(t, stateOffsets.end);
2305     DUMP_U32(t, boundary.reportEodOffset);
2306     DUMP_U32(t, boundary.reportZeroOffset);
2307     DUMP_U32(t, boundary.reportZeroEodOffset);
2308     DUMP_U32(t, totalNumLiterals);
2309     DUMP_U32(t, asize);
2310     DUMP_U32(t, outfixBeginQueue);
2311     DUMP_U32(t, outfixEndQueue);
2312     DUMP_U32(t, leftfixBeginQueue);
2313     DUMP_U32(t, initMpvNfa);
2314     DUMP_U32(t, rosePrefixCount);
2315     DUMP_U32(t, activeLeftIterOffset);
2316     DUMP_U32(t, ematcherRegionSize);
2317     DUMP_U32(t, somRevCount);
2318     DUMP_U32(t, somRevOffsetOffset);
2319     fprintf(f, "}\n");
2320     fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
2321 }
2322 
2323 static
roseDumpComponents(const RoseEngine * t,bool dump_raw,const string & base)2324 void roseDumpComponents(const RoseEngine *t, bool dump_raw,
2325                         const string &base) {
2326     dumpComponentInfo(t, base);
2327     dumpComponentInfoCsv(t, base);
2328     dumpNfas(t, dump_raw, base);
2329     dumpAnchored(t, base);
2330     dumpRevComponentInfo(t, base);
2331     dumpRevNfas(t, dump_raw, base);
2332 }
2333 
2334 static
roseDumpPrograms(const vector<LitFragment> & fragments,const RoseEngine * t,const string & base)2335 void roseDumpPrograms(const vector<LitFragment> &fragments, const RoseEngine *t,
2336                       const string &base) {
2337     dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
2338     dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
2339     dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
2340     dumpRoseLastFlushCombPrograms(t,
2341             base + "/rose_last_flush_comb_programs.txt");
2342     dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
2343     dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
2344     dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");
2345 }
2346 
2347 static
roseDumpLiteralMatchers(const RoseEngine * t,const string & base)2348 void roseDumpLiteralMatchers(const RoseEngine *t, const string &base) {
2349     if (const HWLM *hwlm = getFloatingMatcher(t)) {
2350         hwlmGenerateDumpFiles(hwlm, base + "/lit_table_floating");
2351     }
2352 
2353     if (const HWLM *hwlm = getDelayRebuildMatcher(t)) {
2354         hwlmGenerateDumpFiles(hwlm, base + "/lit_table_delay_rebuild");
2355     }
2356 
2357     if (const HWLM *hwlm = getEodMatcher(t)) {
2358         hwlmGenerateDumpFiles(hwlm, base + "/lit_table_eod");
2359     }
2360 
2361     if (const HWLM *hwlm = getSmallBlockMatcher(t)) {
2362         hwlmGenerateDumpFiles(hwlm, base + "/lit_table_small_block");
2363     }
2364 }
2365 
dumpRose(const RoseBuildImpl & build,const vector<LitFragment> & fragments,const map<left_id,u32> & leftfix_queue_map,const map<suffix_id,u32> & suffix_queue_map,const RoseEngine * t)2366 void dumpRose(const RoseBuildImpl &build, const vector<LitFragment> &fragments,
2367               const map<left_id, u32> &leftfix_queue_map,
2368               const map<suffix_id, u32> &suffix_queue_map,
2369               const RoseEngine *t) {
2370     const Grey &grey = build.cc.grey;
2371 
2372     if (!grey.dumpFlags) {
2373         return;
2374     }
2375 
2376     StdioFile f(grey.dumpPath + "/rose.txt", "w");
2377 
2378     if (!t) {
2379         fprintf(f, "<< no rose >>\n");
2380         return;
2381     }
2382 
2383     // Dump Rose table info
2384     roseDumpText(t, f);
2385 
2386     roseDumpComponents(t, false, grey.dumpPath);
2387     roseDumpPrograms(fragments, t, grey.dumpPath);
2388     roseDumpLiteralMatchers(t, grey.dumpPath);
2389 
2390     // Graph.
2391     dumpRoseGraph(build, t, fragments, leftfix_queue_map, suffix_queue_map,
2392                   "rose.dot");
2393 
2394     // Literals
2395     dumpRoseLiterals(build, fragments, grey);
2396 
2397     f = StdioFile(grey.dumpPath + "/rose_struct.txt", "w");
2398     roseDumpStructRaw(t, f);
2399 }
2400 
2401 } // namespace ue2
2402