1 /*
2  * Copyright (c) 2016-2018, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 
31 #include "ng_violet.h"
32 
33 #include "grey.h"
34 #include "ng_depth.h"
35 #include "ng_dominators.h"
36 #include "ng_dump.h"
37 #include "ng_equivalence.h"
38 #include "ng_holder.h"
39 #include "ng_is_equal.h"
40 #include "ng_literal_analysis.h"
41 #include "ng_limex.h"
42 #include "ng_mcclellan.h"
43 #include "ng_netflow.h"
44 #include "ng_prune.h"
45 #include "ng_redundancy.h"
46 #include "ng_region.h"
47 #include "ng_reports.h"
48 #include "ng_split.h"
49 #include "ng_util.h"
50 #include "ng_width.h"
51 #include "nfa/rdfa.h"
52 #include "rose/rose_build.h"
53 #include "rose/rose_build_util.h"
54 #include "rose/rose_in_dump.h"
55 #include "rose/rose_in_graph.h"
56 #include "rose/rose_in_util.h"
57 #include "util/compare.h"
58 #include "util/compile_context.h"
59 #include "util/container.h"
60 #include "util/flat_containers.h"
61 #include "util/graph.h"
62 #include "util/graph_range.h"
63 #include "util/graph_small_color_map.h"
64 #include "util/insertion_ordered.h"
65 #include "util/make_unique.h"
66 #include "util/order_check.h"
67 #include "util/target_info.h"
68 #include "util/ue2string.h"
69 
70 #include <set>
71 #include <utility>
72 #include <vector>
73 #include <boost/dynamic_bitset.hpp>
74 #include <boost/range/adaptor/map.hpp>
75 
76 #define STAGE_DEBUG_PRINTF DEBUG_PRINTF
77 
78 using namespace std;
79 using boost::adaptors::map_values;
80 
81 namespace ue2 {
82 
83 /* createsAnchoredLHS() is conservative as the depths take into account
84  * back edges that come from beyond the split point and would be missing after
85  * the graph is split. */
86 static
createsAnchoredLHS(const NGHolder & g,const vector<NFAVertex> & vv,const vector<NFAVertexDepth> & depths,const Grey & grey,depth max_depth=depth::infinity ())87 bool createsAnchoredLHS(const NGHolder &g, const vector<NFAVertex> &vv,
88                         const vector<NFAVertexDepth> &depths,
89                         const Grey &grey, depth max_depth = depth::infinity()) {
90     max_depth = min(max_depth, depth(grey.maxAnchoredRegion));
91 
92     for (auto v : vv) {
93         /* avoid issues of self loops blowing out depths:
94          *     look at preds, add 1 */
95         for (auto u : inv_adjacent_vertices_range(v, g)) {
96             if (u == v) {
97                 continue;
98             }
99 
100             u32 idx = g[u].index;
101             assert(idx < depths.size());
102             if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) {
103                 return false;
104             }
105         }
106     }
107     return true;
108 }
109 
110 /* createsTransientLHS() is conservative as the depths take into account
111  * back edges that come from beyond the split point and would be missing after
112  * the graph is split. */
113 static
createsTransientLHS(const NGHolder & g,const vector<NFAVertex> & vv,const vector<NFAVertexDepth> & depths,const Grey & grey)114 bool createsTransientLHS(const NGHolder &g, const vector<NFAVertex> &vv,
115                          const vector<NFAVertexDepth> &depths,
116                          const Grey &grey) {
117     const depth max_depth(grey.maxHistoryAvailable);
118 
119     for (auto v : vv) {
120         /* avoid issues of self loops blowing out depths:
121          *     look at preds, add 1 */
122         for (auto u : inv_adjacent_vertices_range(v, g)) {
123             if (u == v) {
124                 continue;
125             }
126 
127             u32 idx = g[u].index;
128             assert(idx < depths.size());
129             if (maxDistFromInit(depths.at(idx)) >= max_depth) {
130                 return false;
131             }
132         }
133     }
134     return true;
135 }
136 
137 /**
138  * Counts the number of vertices that are reachable from the set of sources
139  * given.
140  */
141 static
count_reachable(const NGHolder & g,const vector<NFAVertex> & sources,small_color_map<decltype(get (vertex_index,g))> & color_map)142 size_t count_reachable(const NGHolder &g, const vector<NFAVertex> &sources,
143                 small_color_map<decltype(get(vertex_index, g))> &color_map) {
144     auto null_visitor = boost::make_dfs_visitor(boost::null_visitor());
145     color_map.fill(small_color::white);
146 
147     for (auto v : sources) {
148         boost::depth_first_visit(g, v, null_visitor, color_map);
149     }
150 
151     return color_map.count(small_color::black);
152 }
153 
154 static
shorter_than(const set<ue2_literal> & s,size_t limit)155 size_t shorter_than(const set<ue2_literal> &s, size_t limit) {
156     return count_if(s.begin(), s.end(),
157                     [&](const ue2_literal &a) { return a.length() < limit; });
158 }
159 
160 static
min_len(const set<ue2_literal> & s)161 u32 min_len(const set<ue2_literal> &s) {
162     u32 rv = ~0U;
163 
164     for (const auto &lit : s) {
165         rv = min(rv, (u32)lit.length());
166     }
167 
168     return rv;
169 }
170 
171 static
min_period(const set<ue2_literal> & s)172 u32 min_period(const set<ue2_literal> &s) {
173     u32 rv = ~0U;
174 
175     for (const auto &lit : s) {
176         rv = min(rv, (u32)minStringPeriod(lit));
177     }
178     DEBUG_PRINTF("min period %u\n", rv);
179     return rv;
180 }
181 
182 namespace {
183 /**
184  * Information on a cut: vertices and literals.
185  */
186 struct VertLitInfo {
VertLitInfoue2::__anon1d8162c80211::VertLitInfo187     VertLitInfo() {}
VertLitInfoue2::__anon1d8162c80211::VertLitInfo188     VertLitInfo(NFAVertex v, const set<ue2_literal> &litlit, bool c_anch,
189                 bool c_tran = false)
190         : vv(vector<NFAVertex>(1, v)), lit(litlit), creates_anchored(c_anch),
191           creates_transient(c_tran) {}
VertLitInfoue2::__anon1d8162c80211::VertLitInfo192     VertLitInfo(const vector<NFAVertex> &vv_in, const set<ue2_literal> &lit_in,
193                 bool c_anch)
194         : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {}
195     vector<NFAVertex> vv;
196     set<ue2_literal> lit;
197 
198     bool creates_anchored = false;
199     bool creates_transient = false;
200     double split_ratio = 0;
201 };
202 
203 #define LAST_CHANCE_STRONG_LEN 1
204 
205 /**
206  * \brief Comparator class for comparing different literal cuts.
207  */
208 class LitComparator {
209 public:
LitComparator(const NGHolder & g_in,bool sa,bool st,bool lc)210     LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc)
211         : g(g_in), seeking_anchored(sa), seeking_transient(st),
212           last_chance(lc) {}
operator ()(const unique_ptr<VertLitInfo> & a,const unique_ptr<VertLitInfo> & b) const213     bool operator()(const unique_ptr<VertLitInfo> &a,
214                     const unique_ptr<VertLitInfo> &b) const {
215         assert(a && b);
216 
217         if (seeking_anchored) {
218             if (a->creates_anchored != b->creates_anchored) {
219                 return a->creates_anchored < b->creates_anchored;
220             }
221         }
222 
223         if (seeking_transient) {
224             if (a->creates_transient != b->creates_transient) {
225                 return a->creates_transient < b->creates_transient;
226             }
227         }
228 
229         if (last_chance
230             && min_len(a->lit) > LAST_CHANCE_STRONG_LEN
231             && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) {
232             DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio,
233                           b->split_ratio);
234             return a->split_ratio < b->split_ratio;
235         }
236 
237         u64a score_a = scoreSet(a->lit);
238         u64a score_b = scoreSet(b->lit);
239 
240         if (score_a != score_b) {
241             return score_a > score_b;
242         }
243 
244         /* vertices should only be in one candidate cut */
245         assert(a->vv == b->vv || a->vv.front() != b->vv.front());
246         return g[a->vv.front()].index > g[b->vv.front()].index;
247     }
248 
249 private:
250     const NGHolder &g; /**< graph on which cuts are found */
251 
252     bool seeking_anchored;
253     bool seeking_transient;
254     bool last_chance;
255 };
256 }
257 
258 #define MIN_ANCHORED_LEN 2
259 #define MIN_ANCHORED_DESPERATE_LEN 1
260 
261 /* anchored here means that the cut creates a 'usefully' anchored LHS */
262 static
validateRoseLiteralSetQuality(const set<ue2_literal> & s,u64a score,bool anchored,u32 min_allowed_floating_len,bool desperation,bool last_chance)263 bool validateRoseLiteralSetQuality(const set<ue2_literal> &s, u64a score,
264                                    bool anchored, u32 min_allowed_floating_len,
265                                    bool desperation, bool last_chance) {
266     u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN
267                                    : min_allowed_floating_len;
268     if (anchored && last_chance) {
269         min_allowed_len = MIN_ANCHORED_DESPERATE_LEN;
270     }
271     if (last_chance) {
272         desperation = true;
273     }
274 
275     DEBUG_PRINTF("validating%s set, min allowed len %u\n",
276                  anchored ? " anchored" : "", min_allowed_len);
277 
278     assert(none_of(begin(s), end(s), bad_mixed_sensitivity));
279 
280     if (score >= NO_LITERAL_AT_EDGE_SCORE) {
281         DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size());
282         return false;
283     }
284 
285     assert(!s.empty());
286     if (s.empty()) {
287         DEBUG_PRINTF("candidate is too bad/something went wrong\n");
288         return false;
289     }
290 
291     u32 s_min_len = min_len(s);
292     u32 s_min_period = min_period(s);
293     size_t short_count = shorter_than(s, 5);
294 
295     DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u"
296                  " short_count=%zu desp=%d\n",
297                  dumpString(*s.begin()).c_str(), score, s.size(), s_min_len,
298                  s_min_period, short_count, (int)desperation);
299 
300     bool ok = true;
301 
302     if (s.size() > 10 /* magic number is magic */
303         || s_min_len < min_allowed_len
304         || (s_min_period <= 1 && min_allowed_len != 1)) {
305         DEBUG_PRINTF("candidate may be bad\n");
306         ok = false;
307     }
308 
309     if (!ok && desperation
310         && s.size() <= 20 /* more magic numbers are magical */
311         && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10))
312         && s_min_period > 1) {
313         DEBUG_PRINTF("candidate is ok\n");
314         ok = true;
315     }
316 
317     if (!ok && desperation
318         && s.size() <= 50 /* more magic numbers are magical */
319         && s_min_len > 10
320         && s_min_period > 1) {
321         DEBUG_PRINTF("candidate is ok\n");
322         ok = true;
323     }
324 
325     if (!ok) {
326         DEBUG_PRINTF("candidate is too shitty\n");
327         return false;
328     }
329 
330     return true;
331 }
332 
333 static UNUSED
dumpRoseLiteralSet(const set<ue2_literal> & s)334 void dumpRoseLiteralSet(const set<ue2_literal> &s) {
335     for (UNUSED const auto &lit : s) {
336         DEBUG_PRINTF("    lit: %s\n", dumpString(lit).c_str());
337     }
338 }
339 
340 static
getSimpleRoseLiterals(const NGHolder & g,bool seeking_anchored,const vector<NFAVertexDepth> * depths,const set<NFAVertex> & a_dom,vector<unique_ptr<VertLitInfo>> * lits,u32 min_allowed_len,bool desperation,bool last_chance,const CompileContext & cc)341 void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored,
342                            const vector<NFAVertexDepth> *depths,
343                            const set<NFAVertex> &a_dom,
344                            vector<unique_ptr<VertLitInfo>> *lits,
345                            u32 min_allowed_len, bool desperation,
346                            bool last_chance, const CompileContext &cc) {
347     assert(depths || !seeking_anchored);
348 
349     map<NFAVertex, u64a> scores;
350     map<NFAVertex, unique_ptr<VertLitInfo>> lit_info;
351     set<ue2_literal> s;
352 
353     for (auto v : a_dom) {
354         s = getLiteralSet(g, v, true); /* RHS will take responsibility for any
355                                           revisits to the target vertex */
356 
357         if (s.empty()) {
358             DEBUG_PRINTF("candidate is too shitty\n");
359             continue;
360         }
361 
362         DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size());
363         dumpRoseLiteralSet(s);
364         u64a score = sanitizeAndCompressAndScore(s);
365 
366         bool anchored = false;
367         if (seeking_anchored) {
368             anchored = createsAnchoredLHS(g, {v}, *depths, cc.grey);
369         }
370 
371         if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len,
372                                            desperation, last_chance)) {
373             continue;
374         }
375 
376         DEBUG_PRINTF("candidate is a candidate\n");
377         scores[v] = score;
378         lit_info[v] = make_unique<VertLitInfo>(v, s, anchored);
379     }
380 
381     /* try to filter out cases where appending some characters produces worse
382      * literals. Only bother to look back one byte, TODO make better */
383     for (auto u : a_dom) {
384         if (out_degree(u, g) != 1 || !scores[u]) {
385             continue;
386         }
387         NFAVertex v = *adjacent_vertices(u, g).first;
388         if (contains(scores, v) && scores[v] >= scores[u]) {
389             DEBUG_PRINTF("killing off v as score %llu >= %llu\n",
390                          scores[v], scores[u]);
391             lit_info.erase(v);
392         }
393     }
394 
395     lits->reserve(lit_info.size());
396     for (auto &m : lit_info) {
397         lits->push_back(move(m.second));
398     }
399     DEBUG_PRINTF("%zu candidate literal sets\n", lits->size());
400 }
401 
402 static
getRegionRoseLiterals(const NGHolder & g,bool seeking_anchored,const vector<NFAVertexDepth> * depths,const set<NFAVertex> & bad,const set<NFAVertex> * allowed,vector<unique_ptr<VertLitInfo>> * lits,u32 min_allowed_len,bool desperation,bool last_chance,const CompileContext & cc)403 void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored,
404                            const vector<NFAVertexDepth> *depths,
405                            const set<NFAVertex> &bad,
406                            const set<NFAVertex> *allowed,
407                            vector<unique_ptr<VertLitInfo>> *lits,
408                            u32 min_allowed_len, bool desperation,
409                            bool last_chance, const CompileContext &cc) {
410     /* This allows us to get more places to split the graph as we are not
411        limited to points where there is a single vertex to split at. */
412 
413     assert(depths || !seeking_anchored);
414 
415     /* TODO: operate over 'proto-regions' which ignore back edges */
416     auto regions = assignRegions(g);
417 
418     set<u32> mand, optional;
419     map<u32, vector<NFAVertex> > exits;
420 
421     for (auto v : vertices_range(g)) {
422         u32 region = regions[v];
423         if (is_any_start(v, g) || region == 0) {
424             continue;
425         }
426 
427         if (is_any_accept(v, g)) {
428             continue;
429         }
430 
431         if (!generates_callbacks(g) && is_match_vertex(v, g)) {
432             /* we cannot leave a completely vacuous infix */
433             continue;
434         }
435 
436         if (isRegionExit(g, v, regions)) {
437             exits[region].push_back(v);
438         }
439 
440         if (isRegionEntry(g, v, regions)) {
441             // Determine whether this region is mandatory or optional. We only
442             // need to do this check for the first entry vertex we encounter
443             // for this region.
444             if (!contains(mand, region) && !contains(optional, region)) {
445                 if (isOptionalRegion(g, v, regions)) {
446                     optional.insert(region);
447                 } else {
448                     mand.insert(region);
449                 }
450             }
451         }
452     }
453 
454     for (const auto &m : exits) {
455         if (false) {
456         next_cand:
457             continue;
458         }
459 
460         const u32 region = m.first;
461         const vector<NFAVertex> &vv = m.second;
462         assert(!vv.empty());
463 
464         if (!contains(mand, region)) {
465             continue;
466         }
467 
468         for (auto v : vv) {
469              /* if an exit is in bad, the region is already handled well
470               * by getSimpleRoseLiterals or is otherwise bad */
471             if (contains(bad, v)) {
472                 goto next_cand;
473             }
474             /* if we are only allowed to consider some vertices, v must be in
475                the list; */
476             if (allowed && !contains(*allowed, v)) {
477                 goto next_cand;
478             }
479         }
480 
481         /* the final region may not have a neat exit. validate that all exits
482          * have an edge to each accept or none do */
483         bool edge_to_a = edge(vv[0], g.accept, g).second;
484         bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second;
485         const auto &reports = g[vv[0]].reports;
486         for (auto v : vv) {
487             if (edge_to_a != edge(v, g.accept, g).second) {
488                 goto next_cand;
489             }
490 
491             if (edge_to_aeod != edge(v, g.acceptEod, g).second) {
492                 goto next_cand;
493             }
494 
495             if (g[v].reports != reports) {
496                 goto next_cand;
497             }
498         }
499 
500         DEBUG_PRINTF("inspecting region %u\n", region);
501         set<ue2_literal> s;
502         for (auto v : vv) {
503             DEBUG_PRINTF("   exit vertex: %zu\n", g[v].index);
504             /* Note: RHS can not be depended on to take all subsequent revisits
505              * to this vertex */
506             set<ue2_literal> ss = getLiteralSet(g, v, false);
507             if (ss.empty()) {
508                 DEBUG_PRINTF("candidate is too shitty\n");
509                 goto next_cand;
510             }
511             insert(&s, ss);
512         }
513 
514         assert(!s.empty());
515 
516         DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size());
517         dumpRoseLiteralSet(s);
518         u64a score = sanitizeAndCompressAndScore(s);
519 
520         DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size());
521         dumpRoseLiteralSet(s);
522 
523         bool anchored = false;
524         if (seeking_anchored) {
525             anchored = createsAnchoredLHS(g, vv, *depths, cc.grey);
526         }
527 
528         if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len,
529                                            desperation, last_chance)) {
530             goto next_cand;
531         }
532 
533         DEBUG_PRINTF("candidate is a candidate\n");
534         lits->push_back(make_unique<VertLitInfo>(vv, s, anchored));
535     }
536 }
537 
538 static
filterCandPivots(const NGHolder & g,const set<NFAVertex> & cand_raw,set<NFAVertex> * out)539 void filterCandPivots(const NGHolder &g, const set<NFAVertex> &cand_raw,
540                       set<NFAVertex> *out) {
541     for (auto u : cand_raw) {
542         const CharReach &u_cr = g[u].char_reach;
543         if (u_cr.count() > 40) {
544             continue; /* too wide to be plausible */
545         }
546 
547         if (u_cr.count() > 2) {
548             /* include u as a candidate as successor may have backed away from
549              * expanding through it */
550             out->insert(u);
551             continue;
552         }
553 
554         NFAVertex v = getSoleDestVertex(g, u);
555         if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) {
556             const CharReach &v_cr = g[v].char_reach;
557             if (v_cr.count() == 1 || v_cr.isCaselessChar()) {
558                 continue; /* v will always generate better literals */
559             }
560         }
561 
562         out->insert(u);
563     }
564 }
565 
566 /* cand_raw is the candidate set before filtering points which are clearly
567  * a bad idea. */
568 static
getCandidatePivots(const NGHolder & g,set<NFAVertex> * cand,set<NFAVertex> * cand_raw)569 void getCandidatePivots(const NGHolder &g, set<NFAVertex> *cand,
570                         set<NFAVertex> *cand_raw) {
571     auto dominators = findDominators(g);
572 
573     set<NFAVertex> accepts;
574 
575     for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
576         if (is_special(v, g)) {
577             continue;
578         }
579         accepts.insert(v);
580     }
581     for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
582         if (is_special(v, g)) {
583             continue;
584         }
585         accepts.insert(v);
586     }
587 
588     assert(!accepts.empty());
589 
590     vector<NFAVertex> dom_trace;
591     auto ait = accepts.begin();
592     assert(ait != accepts.end());
593     NFAVertex curr = *ait;
594     while (curr && !is_special(curr, g)) {
595         dom_trace.push_back(curr);
596         curr = dominators[curr];
597     }
598     reverse(dom_trace.begin(), dom_trace.end());
599     for (++ait; ait != accepts.end(); ++ait) {
600         curr = *ait;
601         vector<NFAVertex> dom_trace2;
602         while (curr && !is_special(curr, g)) {
603             dom_trace2.push_back(curr);
604             curr = dominators[curr];
605         }
606         reverse(dom_trace2.begin(), dom_trace2.end());
607         auto dti = dom_trace.begin(), dtie = dom_trace.end();
608         auto dtj = dom_trace2.begin(), dtje = dom_trace2.end();
609         while (dti != dtie && dtj != dtje && *dti == *dtj) {
610             ++dti;
611             ++dtj;
612         }
613         dom_trace.erase(dti, dtie);
614     }
615 
616     cand_raw->insert(dom_trace.begin(), dom_trace.end());
617 
618     filterCandPivots(g, *cand_raw, cand);
619 }
620 
621 static
findBestSplit(const NGHolder & g,const vector<NFAVertexDepth> * depths,bool for_prefix,u32 min_len,const set<NFAVertex> * allowed_cand,const set<NFAVertex> * disallowed_cand,bool last_chance,const CompileContext & cc)622 unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g,
623                                       const vector<NFAVertexDepth> *depths,
624                                       bool for_prefix, u32 min_len,
625                                       const set<NFAVertex> *allowed_cand,
626                                       const set<NFAVertex> *disallowed_cand,
627                                       bool last_chance,
628                                       const CompileContext &cc) {
629     assert(!for_prefix || depths);
630 
631     /* look for a single simple split point */
632     set<NFAVertex> cand;
633     set<NFAVertex> cand_raw;
634 
635     getCandidatePivots(g, &cand, &cand_raw);
636 
637     if (allowed_cand) {
638         set<NFAVertex> cand2;
639         set<NFAVertex> cand2_raw;
640         set_intersection(allowed_cand->begin(), allowed_cand->end(),
641                          cand.begin(), cand.end(),
642                          inserter(cand2, cand2.begin()));
643 
644         set_intersection(allowed_cand->begin(), allowed_cand->end(),
645                          cand_raw.begin(), cand_raw.end(),
646                          inserter(cand2_raw, cand2_raw.begin()));
647 
648         cand = std::move(cand2);
649         cand_raw = std::move(cand2_raw);
650     }
651     if (disallowed_cand) {
652         DEBUG_PRINTF("%zu disallowed candidates\n", disallowed_cand->size());
653         DEBUG_PRINTF("|old cand| = %zu\n", cand.size());
654         erase_all(&cand, *disallowed_cand);
655         insert(&cand_raw, *disallowed_cand);
656     }
657 
658     if (!generates_callbacks(g)) {
659         /* not output exposed so must leave some RHS */
660         for (NFAVertex v : inv_adjacent_vertices_range(g.accept, g)) {
661             cand.erase(v);
662             cand_raw.erase(v);
663         }
664 
665         for (NFAVertex v : inv_adjacent_vertices_range(g.acceptEod, g)) {
666             cand.erase(v);
667             cand_raw.erase(v);
668         }
669     }
670 
671     DEBUG_PRINTF("|cand| = %zu\n", cand.size());
672 
673     bool seeking_anchored = for_prefix;
674     bool seeking_transient = for_prefix;
675 
676     bool desperation = for_prefix && cc.streaming;
677 
678     vector<unique_ptr<VertLitInfo>> lits; /**< sorted list of potential cuts */
679 
680     getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len,
681                           desperation, last_chance, cc);
682     getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand,
683                           &lits, min_len, desperation, last_chance, cc);
684 
685     if (lits.empty()) {
686         DEBUG_PRINTF("no literals found\n");
687         return nullptr;
688     }
689 
690     if (seeking_transient) {
691         for (auto &a : lits) {
692             a->creates_transient
693                 = createsTransientLHS(g, a->vv, *depths, cc.grey);
694         }
695     }
696 
697     if (last_chance) {
698         const size_t num_verts = num_vertices(g);
699         auto color_map = make_small_color_map(g);
700         for (auto &a : lits) {
701             size_t num_reachable = count_reachable(g, a->vv, color_map);
702             double ratio = (double)num_reachable / (double)num_verts;
703             a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio;
704         }
705     }
706 
707     auto cmp = LitComparator(g, seeking_anchored, seeking_transient,
708                              last_chance);
709 
710     unique_ptr<VertLitInfo> best = move(lits.back());
711     lits.pop_back();
712     while (!lits.empty()) {
713         if (cmp(best, lits.back())) {
714             best = move(lits.back());
715         }
716         lits.pop_back();
717     }
718 
719     DEBUG_PRINTF("best is '%s' %zu a%d t%d\n",
720         dumpString(*best->lit.begin()).c_str(),
721         g[best->vv.front()].index,
722         depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0,
723         depths ? (int)createsTransientLHS(g, best->vv, *depths, cc.grey) : 0);
724 
725     return best;
726 }
727 
728 static
poisonFromSuccessor(const NGHolder & h,const ue2_literal & succ,bool overhang_ok,flat_set<NFAEdge> & bad)729 void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ,
730                          bool overhang_ok, flat_set<NFAEdge> &bad) {
731     DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n",
732                  num_vertices(h), succ.length());
733 
734     using EdgeSet = boost::dynamic_bitset<>;
735 
736     const size_t edge_count = num_edges(h);
737     EdgeSet bad_edges(edge_count);
738 
739     unordered_map<NFAVertex, EdgeSet> curr;
740     for (const auto &e : in_edges_range(h.accept, h)) {
741         auto &path_set = curr[source(e, h)];
742         if (path_set.empty()) {
743             path_set.resize(edge_count);
744         }
745         path_set.set(h[e].index);
746     }
747 
748     unordered_map<NFAVertex, EdgeSet> next;
749     for (auto it = succ.rbegin(); it != succ.rend(); ++it) {
750         for (const auto &path : curr) {
751             NFAVertex u = path.first;
752             const auto &path_set = path.second;
753             if (u == h.start && overhang_ok) {
754                 DEBUG_PRINTF("poisoning early %zu [overhang]\n",
755                              path_set.count());
756                 bad_edges |= path_set;
757                 continue;
758             }
759             if (overlaps(h[u].char_reach, *it)) {
760                 for (const auto &e : in_edges_range(u, h)) {
761                     auto &new_path_set = next[source(e, h)];
762                     if (new_path_set.empty()) {
763                         new_path_set.resize(edge_count);
764                     }
765                     new_path_set |= path_set;
766                     new_path_set.set(h[e].index);
767                 }
768             }
769         }
770         DEBUG_PRINTF("succ char matches at %zu paths\n", next.size());
771         assert(overhang_ok || !curr.empty());
772         swap(curr, next);
773         next.clear();
774     }
775 
776     assert(overhang_ok || !curr.empty());
777     for (const auto &path : curr) {
778         bad_edges |= path.second;
779         DEBUG_PRINTF("poisoning %zu vertices\n", path.second.count());
780     }
781 
782     for (const auto &e : edges_range(h)) {
783         if (bad_edges.test(h[e].index)) {
784             bad.insert(e);
785         }
786     }
787 }
788 
789 static
poisonForGoodPrefix(const NGHolder & h,const vector<NFAVertexDepth> & depths,flat_set<NFAEdge> & bad,const Grey & grey)790 void poisonForGoodPrefix(const NGHolder &h,
791                          const vector<NFAVertexDepth> &depths,
792                          flat_set<NFAEdge> &bad, const Grey &grey) {
793     for (const auto &v : vertices_range(h)) {
794         if (!createsAnchoredLHS(h, {v}, depths, grey)
795             && !createsTransientLHS(h, {v}, depths, grey)) {
796             insert(&bad, in_edges_range(v, h));
797         }
798     }
799 }
800 
801 static UNUSED
is_any_accept_type(RoseInVertexType t)802 bool is_any_accept_type(RoseInVertexType t) {
803     return t == RIV_ACCEPT || t == RIV_ACCEPT_EOD;
804 }
805 
806 static
poisonEdges(const NGHolder & h,const vector<NFAVertexDepth> * depths,const RoseInGraph & vg,const vector<RoseInEdge> & ee,bool for_prefix,const Grey & grey)807 flat_set<NFAEdge> poisonEdges(const NGHolder &h,
808                          const vector<NFAVertexDepth> *depths,
809                          const RoseInGraph &vg, const vector<RoseInEdge> &ee,
810                          bool for_prefix, const Grey &grey) {
811     DEBUG_PRINTF("poisoning edges %zu successor edges\n", ee.size());
812 
813     /* poison edges covered by successor literal */
814 
815     set<pair<ue2_literal, bool> > succs;
816     for (const RoseInEdge &ve : ee) {
817         if (vg[target(ve, vg)].type != RIV_LITERAL) {
818             /* nothing to poison in suffixes/outfixes */
819             assert(generates_callbacks(h));
820             assert(is_any_accept_type(vg[target(ve, vg)].type));
821             continue;
822         }
823         succs.insert({vg[target(ve, vg)].s,
824                     vg[source(ve, vg)].type == RIV_LITERAL});
825 
826     }
827 
828     DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size());
829 
830     flat_set<NFAEdge> bad;
831     for (const auto &p : succs) {
832         poisonFromSuccessor(h, p.first, p.second, bad);
833     }
834 
835     /* poison edges which don't significantly improve a prefix */
836 
837     if (for_prefix) {
838         poisonForGoodPrefix(h, *depths, bad, grey);
839     }
840 
841     return bad;
842 }
843 
844 static
poisonVertices(const NGHolder & h,const RoseInGraph & vg,const vector<RoseInEdge> & ee,const Grey & grey)845 set<NFAVertex> poisonVertices(const NGHolder &h, const RoseInGraph &vg,
846                               const vector<RoseInEdge> &ee, const Grey &grey) {
847     flat_set<NFAEdge> bad_edges = poisonEdges(h, nullptr, vg, ee, false, grey);
848     set<NFAVertex> bad_vertices;
849     for (const NFAEdge &e : bad_edges) {
850         bad_vertices.insert(target(e, h));
851         DEBUG_PRINTF("bad: %zu->%zu\n", h[source(e, h)].index,
852                      h[target(e, h)].index);
853     }
854 
855     return bad_vertices;
856 }
857 
858 static
findBestNormalSplit(const NGHolder & g,const RoseInGraph & vg,const vector<RoseInEdge> & ee,const CompileContext & cc)859 unique_ptr<VertLitInfo> findBestNormalSplit(const NGHolder &g,
860                                             const RoseInGraph &vg,
861                                             const vector<RoseInEdge> &ee,
862                                             const CompileContext &cc) {
863     assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX);
864     set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
865 
866     return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength,
867                          nullptr, &bad_vertices, false, cc);
868 }
869 
870 static
findBestLastChanceSplit(const NGHolder & g,const RoseInGraph & vg,const vector<RoseInEdge> & ee,const CompileContext & cc)871 unique_ptr<VertLitInfo> findBestLastChanceSplit(const NGHolder &g,
872                                                 const RoseInGraph &vg,
873                                                 const vector<RoseInEdge> &ee,
874                                                 const CompileContext &cc) {
875     assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX);
876     set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
877 
878     return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength,
879                          nullptr, &bad_vertices, true, cc);
880 }
881 
882 static
findSimplePrefixSplit(const NGHolder & g,const CompileContext & cc)883 unique_ptr<VertLitInfo> findSimplePrefixSplit(const NGHolder &g,
884                                               const CompileContext &cc) {
885     DEBUG_PRINTF("looking for simple prefix split\n");
886     bool anchored = !proper_out_degree(g.startDs, g);
887     NFAVertex u = anchored ? g.start : g.startDs;
888 
889     if (out_degree(u, g) != 2) { /* startDs + succ */
890         return nullptr;
891     }
892 
893     NFAVertex v = NGHolder::null_vertex();
894     for (NFAVertex t : adjacent_vertices_range(u, g)) {
895         if (t != g.startDs) {
896             assert(!v);
897             v = t;
898         }
899     }
900     assert(v);
901 
902     if (!anchored) {
903         if (out_degree(g.start, g) > 2) {
904             return nullptr;
905         }
906         if (out_degree(g.start, g) == 2 && !edge(g.start, v, g).second) {
907             return nullptr;
908         }
909     }
910 
911     NFAVertex best_v = NGHolder::null_vertex();
912     ue2_literal best_lit;
913 
914     u32 limit = cc.grey.maxHistoryAvailable;
915     if (anchored) {
916         LIMIT_TO_AT_MOST(&limit, cc.grey.maxAnchoredRegion);
917     }
918 
919     ue2_literal curr_lit;
920     for (u32 i = 0; i < limit; i++) {
921         const auto &v_cr = g[v].char_reach;
922         if (v_cr.count() == 1 || v_cr.isCaselessChar()) {
923             curr_lit.push_back(v_cr.find_first(), v_cr.isCaselessChar());
924         } else {
925             curr_lit.clear();
926         }
927 
928         if (curr_lit.length() > best_lit.length()) {
929             best_lit = curr_lit;
930             best_v = v;
931         }
932 
933         if (out_degree(v, g) != 1) {
934             break;
935         }
936         v = *adjacent_vertices(v, g).first;
937     }
938 
939     if (best_lit.length() < cc.grey.minRoseLiteralLength) {
940         return nullptr;
941     }
942 
943     set<ue2_literal> best_lit_set({best_lit});
944     if (bad_mixed_sensitivity(best_lit)) {
945         sanitizeAndCompressAndScore(best_lit_set);
946     }
947 
948     return ue2::make_unique<VertLitInfo>(best_v, best_lit_set, anchored, true);
949 }
950 
951 static
findBestPrefixSplit(const NGHolder & g,const vector<NFAVertexDepth> & depths,const RoseInGraph & vg,const vector<RoseInEdge> & ee,bool last_chance,const CompileContext & cc)952 unique_ptr<VertLitInfo> findBestPrefixSplit(const NGHolder &g,
953                                         const vector<NFAVertexDepth> &depths,
954                                         const RoseInGraph &vg,
955                                         const vector<RoseInEdge> &ee,
956                                         bool last_chance,
957                                         const CompileContext &cc) {
958     assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX);
959     set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
960     auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength,
961                             nullptr, &bad_vertices, last_chance, cc);
962 
963     /* large back edges may prevent us identifying anchored or transient cases
964      * properly - use a simple walk instead */
965     if (!rv || !(rv->creates_transient || rv->creates_anchored)) {
966         auto rv2 = findSimplePrefixSplit(g, cc);
967         if (rv2) {
968             return rv2;
969         }
970     }
971 
972     return rv;
973 }
974 
975 static
findBestCleanSplit(const NGHolder & g,const CompileContext & cc)976 unique_ptr<VertLitInfo> findBestCleanSplit(const NGHolder &g,
977                                            const CompileContext &cc) {
978     assert(g.kind != NFA_PREFIX);
979     set<NFAVertex> cleanSplits;
980     for (NFAVertex v : vertices_range(g)) {
981         if (!g[v].char_reach.all() || !edge(v, v, g).second) {
982             continue;
983         }
984         insert(&cleanSplits, inv_adjacent_vertices(v, g));
985         cleanSplits.erase(v);
986     }
987     cleanSplits.erase(g.start);
988     if (cleanSplits.empty()) {
989         return nullptr;
990     }
991     return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen,
992                          &cleanSplits, nullptr, false, cc);
993 }
994 
995 static
can_match(const NGHolder & g,const ue2_literal & lit,bool overhang_ok)996 bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) {
997     set<NFAVertex> curr, next;
998     curr.insert(g.accept);
999 
1000     for (auto it = lit.rbegin(); it != lit.rend(); ++it) {
1001         next.clear();
1002 
1003         for (auto v : curr) {
1004             for (auto u : inv_adjacent_vertices_range(v, g)) {
1005                 if (u == g.start) {
1006                     if (overhang_ok) {
1007                         DEBUG_PRINTF("bail\n");
1008                         return true;
1009                     } else {
1010                         continue; /* it is not possible for a lhs literal to
1011                                    * overhang the start */
1012                     }
1013                 }
1014 
1015                 const CharReach &cr = g[u].char_reach;
1016                 if (!overlaps(*it, cr)) {
1017                      continue;
1018                 }
1019 
1020                 next.insert(u);
1021             }
1022         }
1023 
1024         curr.swap(next);
1025     }
1026 
1027     return !curr.empty();
1028 }
1029 
1030 static
splitRoseEdge(const NGHolder & base_graph,RoseInGraph & vg,const vector<RoseInEdge> & ee,const VertLitInfo & split)1031 bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg,
1032                    const vector<RoseInEdge> &ee, const VertLitInfo &split) {
1033     const vector<NFAVertex> &splitters = split.vv;
1034     assert(!splitters.empty());
1035 
1036     shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
1037     shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
1038 
1039     unordered_map<NFAVertex, NFAVertex> lhs_map;
1040     unordered_map<NFAVertex, NFAVertex> rhs_map;
1041 
1042     splitGraph(base_graph, splitters, lhs.get(), &lhs_map, rhs.get(), &rhs_map);
1043     DEBUG_PRINTF("split %s:%zu into %s:%zu + %s:%zu\n",
1044                  to_string(base_graph.kind).c_str(), num_vertices(base_graph),
1045                  to_string(lhs->kind).c_str(), num_vertices(*lhs),
1046                  to_string(rhs->kind).c_str(), num_vertices(*rhs));
1047 
1048     bool suffix = generates_callbacks(base_graph);
1049 
1050     if (is_triggered(base_graph)) {
1051         /* if we are already guarded, check if the split reduces the size of
1052          * the problem before continuing with the split */
1053         if (num_vertices(*lhs) >= num_vertices(base_graph)
1054             && !(suffix && isVacuous(*rhs))) {
1055             DEBUG_PRINTF("split's lhs is no smaller\n");
1056             return false;
1057         }
1058 
1059         if (num_vertices(*rhs) >= num_vertices(base_graph)) {
1060             DEBUG_PRINTF("split's rhs is no smaller\n");
1061             return false;
1062         }
1063     }
1064 
1065     bool do_accept = false;
1066     bool do_accept_eod = false;
1067     assert(rhs);
1068     if (isVacuous(*rhs) && suffix) {
1069         if (edge(rhs->start, rhs->accept, *rhs).second) {
1070             DEBUG_PRINTF("rhs has a cliche\n");
1071             do_accept = true;
1072             remove_edge(rhs->start, rhs->accept, *rhs);
1073         }
1074 
1075         if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
1076             DEBUG_PRINTF("rhs has an eod cliche\n");
1077             do_accept_eod = true;
1078             remove_edge(rhs->start, rhs->acceptEod, *rhs);
1079         }
1080 
1081         renumber_edges(*rhs);
1082     }
1083 
1084     /* check if we still have a useful graph left over */
1085     bool do_norm = out_degree(rhs->start, *rhs) != 1;
1086 
1087     set<ReportID> splitter_reports;
1088     for (auto v : splitters) {
1089         insert(&splitter_reports, base_graph[v].reports);
1090     }
1091 
1092     /* find the targets of each source vertex; insertion_ordered_map used to
1093      * preserve deterministic ordering */
1094     insertion_ordered_map<RoseInVertex, vector<RoseInVertex>> images;
1095     for (const RoseInEdge &e : ee) {
1096         RoseInVertex src = source(e, vg);
1097         RoseInVertex dest = target(e, vg);
1098         images[src].push_back(dest);
1099         remove_edge(e, vg);
1100     }
1101 
1102     map<vector<RoseInVertex>, vector<RoseInVertex>> verts_by_image;
1103 
1104     for (const auto &m : images) {
1105         const auto &u = m.first;
1106         const auto &image = m.second;
1107 
1108         if (contains(verts_by_image, image)) {
1109             for (RoseInVertex v : verts_by_image[image]) {
1110                 add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg);
1111             }
1112             continue;
1113         }
1114 
1115         for (const auto &lit : split.lit) {
1116             assert(!bad_mixed_sensitivity(lit));
1117 
1118             /* don't allow overhang in can_match() as literals should
1119              * correspond to the edge graph being split; overhanging the graph
1120              * would indicate a false path.*/
1121             if (!can_match(*lhs, lit, false)) {
1122                 DEBUG_PRINTF("'%s' did not match lhs\n",
1123                              escapeString(lit).c_str());
1124                 continue;
1125             }
1126 
1127             DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str());
1128             auto v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
1129             add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg);
1130 
1131             /* work out delay later */
1132             if (do_accept) {
1133                 DEBUG_PRINTF("rhs has a cliche\n");
1134                 auto tt = add_vertex(RoseInVertexProps::makeAccept(
1135                                                          splitter_reports), vg);
1136                 add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
1137             }
1138 
1139             if (do_accept_eod) {
1140                 DEBUG_PRINTF("rhs has an eod cliche\n");
1141                 auto tt = add_vertex(RoseInVertexProps::makeAcceptEod(
1142                                                          splitter_reports), vg);
1143                 add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
1144             }
1145 
1146             if (do_norm) {
1147                 assert(out_degree(rhs->start, *rhs) > 1);
1148                 for (RoseInVertex dest : image) {
1149                     add_edge(v, dest, RoseInEdgeProps(rhs, 0U), vg);
1150                 }
1151             }
1152             verts_by_image[image].push_back(v);
1153         }
1154     }
1155 
1156     assert(hasCorrectlyNumberedVertices(*rhs));
1157     assert(hasCorrectlyNumberedEdges(*rhs));
1158     assert(isCorrectlyTopped(*rhs));
1159     assert(hasCorrectlyNumberedVertices(*lhs));
1160     assert(hasCorrectlyNumberedEdges(*lhs));
1161     assert(isCorrectlyTopped(*lhs));
1162 
1163     return true;
1164 }
1165 
1166 #define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */
1167 #define MAX_LEN_2_LITERALS_PER_CUT 3
1168 
1169 static
checkValidNetflowLits(NGHolder & h,const vector<u64a> & scores,const map<NFAEdge,set<ue2_literal>> & cut_lits,u32 min_allowed_length)1170 bool checkValidNetflowLits(NGHolder &h, const vector<u64a> &scores,
1171                            const map<NFAEdge, set<ue2_literal>> &cut_lits,
1172                            u32 min_allowed_length) {
1173     DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(),
1174                  min_allowed_length);
1175     if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) {
1176         return false;
1177     }
1178 
1179     u32 len_2_count = 0;
1180 
1181     for (const auto &cut : cut_lits) {
1182         if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) {
1183             DEBUG_PRINTF("cut uses a forbidden edge\n");
1184             return false;
1185         }
1186 
1187         if (min_len(cut.second) < min_allowed_length) {
1188             DEBUG_PRINTF("cut uses a bad literal\n");
1189             return false;
1190         }
1191 
1192         for (const auto &lit : cut.second) {
1193             if (lit.length() == 2) {
1194                 len_2_count++;
1195             }
1196         }
1197     }
1198 
1199     if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) {
1200         return false;
1201     }
1202 
1203     return true;
1204 }
1205 
1206 static
splitEdgesByCut(NGHolder & h,RoseInGraph & vg,const vector<RoseInEdge> & to_cut,const vector<NFAEdge> & cut,const map<NFAEdge,set<ue2_literal>> & cut_lits)1207 void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
1208                      const vector<RoseInEdge> &to_cut,
1209                      const vector<NFAEdge> &cut,
1210                      const map<NFAEdge, set<ue2_literal>> &cut_lits) {
1211     DEBUG_PRINTF("splitting %s (%zu vertices)\n", to_string(h.kind).c_str(),
1212                  num_vertices(h));
1213 
1214     /* create literal vertices and connect preds */
1215     unordered_set<RoseInVertex> done_sources;
1216     map<RoseInVertex, vector<pair<RoseInVertex, NFAVertex>>> verts_by_source;
1217     for (const RoseInEdge &ve : to_cut) {
1218         assert(&h == &*vg[ve].graph);
1219         RoseInVertex src = source(ve, vg);
1220         if (!done_sources.insert(src).second) {
1221             continue; /* already processed */
1222         }
1223 
1224         /* iterate over cut for determinism */
1225         for (const auto &e : cut) {
1226             NFAVertex prev_v = source(e, h);
1227             NFAVertex pivot = target(e, h);
1228 
1229             DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index);
1230             unordered_map<NFAVertex, NFAVertex> temp_map;
1231             shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>();
1232             splitLHS(h, pivot, new_lhs.get(), &temp_map);
1233 
1234             /* want to cut off paths to pivot from things other than the pivot -
1235              * makes a more svelte graphy */
1236             clear_in_edges(temp_map[pivot], *new_lhs);
1237             NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot],
1238                                           *new_lhs);
1239             if (is_triggered(h) && prev_v == h.start) {
1240                 (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP);
1241             }
1242 
1243             pruneUseless(*new_lhs, false);
1244             renumber_vertices(*new_lhs);
1245             renumber_edges(*new_lhs);
1246 
1247             DEBUG_PRINTF("    into lhs %s (%zu vertices)\n",
1248                          to_string(new_lhs->kind).c_str(),
1249                          num_vertices(*new_lhs));
1250 
1251             assert(hasCorrectlyNumberedVertices(*new_lhs));
1252             assert(hasCorrectlyNumberedEdges(*new_lhs));
1253             assert(isCorrectlyTopped(*new_lhs));
1254 
1255             const set<ue2_literal> &lits = cut_lits.at(e);
1256             for (const auto &lit : lits) {
1257                 if (!can_match(*new_lhs, lit, is_triggered(h))) {
1258                     continue;
1259                 }
1260 
1261                 RoseInVertex v
1262                     = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
1263 
1264                 /* if this is a prefix/infix an edge directly to accept should
1265                  * represent a false path as we have poisoned vertices covered
1266                  * by the literals. */
1267                 if (generates_callbacks(h)) {
1268                     if (edge(pivot, h.accept, h).second) {
1269                         DEBUG_PRINTF("adding acceptEod\n");
1270                         /* literal has a direct connection to accept */
1271                         const flat_set<ReportID> &reports = h[pivot].reports;
1272                         auto tt = add_vertex(
1273                                     RoseInVertexProps::makeAccept(reports), vg);
1274                         add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
1275                     }
1276 
1277                     if (edge(pivot, h.acceptEod, h).second) {
1278                         assert(generates_callbacks(h));
1279                         DEBUG_PRINTF("adding acceptEod\n");
1280                         /* literal has a direct connection to accept */
1281                         const flat_set<ReportID> &reports = h[pivot].reports;
1282                         auto tt = add_vertex(
1283                                  RoseInVertexProps::makeAcceptEod(reports), vg);
1284                         add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
1285                     }
1286                 }
1287 
1288                 add_edge(src, v, RoseInEdgeProps(new_lhs, 0), vg);
1289                 verts_by_source[src].push_back({v, pivot});
1290             }
1291         }
1292     }
1293 
1294     /* wire the literal vertices up to successors */
1295     map<vector<NFAVertex>, shared_ptr<NGHolder> > done_rhs;
1296     for (const RoseInEdge &ve : to_cut) {
1297         RoseInVertex src = source(ve, vg);
1298         RoseInVertex dest = target(ve, vg);
1299 
1300         /* iterate over cut for determinism */
1301         for (const auto &elem : verts_by_source[src]) {
1302             NFAVertex pivot = elem.second;
1303             RoseInVertex v = elem.first;
1304 
1305             vector<NFAVertex> adj;
1306             insert(&adj, adj.end(), adjacent_vertices(pivot, h));
1307             /* we can ignore presence of accept, accepteod in adj as it is best
1308                effort */
1309 
1310             if (!contains(done_rhs, adj)) {
1311                 unordered_map<NFAVertex, NFAVertex> temp_map;
1312                 shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>();
1313                 splitRHS(h, adj, new_rhs.get(), &temp_map);
1314                 remove_edge(new_rhs->start, new_rhs->accept, *new_rhs);
1315                 remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs);
1316                 renumber_edges(*new_rhs);
1317                 DEBUG_PRINTF("    into rhs %s (%zu vertices)\n",
1318                              to_string(new_rhs->kind).c_str(),
1319                              num_vertices(*new_rhs));
1320                 done_rhs.emplace(adj, new_rhs);
1321                 assert(isCorrectlyTopped(*new_rhs));
1322             }
1323 
1324             assert(done_rhs[adj].get());
1325             shared_ptr<NGHolder> new_rhs = done_rhs[adj];
1326 
1327             assert(hasCorrectlyNumberedVertices(*new_rhs));
1328             assert(hasCorrectlyNumberedEdges(*new_rhs));
1329             assert(isCorrectlyTopped(*new_rhs));
1330 
1331             if (vg[dest].type == RIV_LITERAL
1332                 && !can_match(*new_rhs, vg[dest].s, true)) {
1333                 continue;
1334             }
1335 
1336             if (out_degree(new_rhs->start, *new_rhs) != 1) {
1337                 add_edge(v, dest, RoseInEdgeProps(new_rhs, 0), vg);
1338             }
1339         }
1340 
1341         remove_edge(ve, vg);
1342     }
1343 }
1344 
1345 static
doNetflowCut(NGHolder & h,const vector<NFAVertexDepth> * depths,RoseInGraph & vg,const vector<RoseInEdge> & ee,bool for_prefix,const Grey & grey,u32 min_allowed_length=0U)1346 bool doNetflowCut(NGHolder &h,
1347                   const vector<NFAVertexDepth> *depths,
1348                   RoseInGraph &vg,
1349                   const vector<RoseInEdge> &ee, bool for_prefix,
1350                   const Grey &grey, u32 min_allowed_length = 0U) {
1351     ENSURE_AT_LEAST(&min_allowed_length, grey.minRoseNetflowLiteralLength);
1352 
1353     DEBUG_PRINTF("doing netflow cut\n");
1354     /* TODO: we should really get literals/scores from the full graph as this
1355      * allows us to overlap with previous cuts. */
1356     assert(!ee.empty());
1357     assert(&h == &*vg[ee.front()].graph);
1358     assert(!for_prefix || depths);
1359 
1360     if (num_edges(h) > grey.maxRoseNetflowEdges) {
1361         /* We have a limit on this because scoring edges and running netflow
1362          * gets very slow for big graphs. */
1363         DEBUG_PRINTF("too many edges, skipping netflow cut\n");
1364         return false;
1365     }
1366 
1367     assert(hasCorrectlyNumberedVertices(h));
1368     assert(hasCorrectlyNumberedEdges(h));
1369 
1370     auto known_bad = poisonEdges(h, depths, vg, ee, for_prefix, grey);
1371 
1372     /* Step 1: Get scores for all edges */
1373     vector<u64a> scores = scoreEdges(h, known_bad); /* scores by edge_index */
1374 
1375     /* Step 2: Find cutset based on scores */
1376     vector<NFAEdge> cut = findMinCut(h, scores);
1377 
1378     /* Step 3: Get literals corresponding to cut edges */
1379     map<NFAEdge, set<ue2_literal>> cut_lits;
1380     for (const auto &e : cut) {
1381         set<ue2_literal> lits = getLiteralSet(h, e);
1382         sanitizeAndCompressAndScore(lits);
1383 
1384         cut_lits[e] = lits;
1385     }
1386 
1387     /* if literals are underlength bail or if it involves a forbidden edge*/
1388     if (!checkValidNetflowLits(h, scores, cut_lits, min_allowed_length)) {
1389         return false;
1390     }
1391     DEBUG_PRINTF("splitting\n");
1392 
1393     /* Step 4: Split graph based on cuts */
1394     splitEdgesByCut(h, vg, ee, cut, cut_lits);
1395 
1396     return true;
1397 }
1398 
1399 static
deanchorIfNeeded(NGHolder & g)1400 bool deanchorIfNeeded(NGHolder &g) {
1401     DEBUG_PRINTF("hi\n");
1402     if (proper_out_degree(g.startDs, g)) {
1403         return false;
1404     }
1405 
1406     /* look for a non-special dot with a loop following start */
1407     set<NFAVertex> succ_g;
1408     insert(&succ_g, adjacent_vertices(g.start, g));
1409     succ_g.erase(g.startDs);
1410 
1411     for (auto v : adjacent_vertices_range(g.start, g)) {
1412         DEBUG_PRINTF("inspecting cand %zu || = %zu\n", g[v].index,
1413                      g[v].char_reach.count());
1414 
1415         if (v == g.startDs || !g[v].char_reach.all()) {
1416             continue;
1417         }
1418 
1419         set<NFAVertex> succ_v;
1420         insert(&succ_v, adjacent_vertices(v, g));
1421 
1422         if (succ_v == succ_g) {
1423             DEBUG_PRINTF("found ^.*\n");
1424             for (auto succ : adjacent_vertices_range(g.start, g)) {
1425                 if (succ == g.startDs) {
1426                     continue;
1427                 }
1428                 add_edge(g.startDs, succ, g);
1429             }
1430             clear_vertex(v, g);
1431             remove_vertex(v, g);
1432             renumber_vertices(g);
1433             return true;
1434         }
1435 
1436         if (succ_g.size() == 1 && hasSelfLoop(v, g)) {
1437             DEBUG_PRINTF("found ^.+\n");
1438             add_edge(g.startDs, v, g);
1439             remove_edge(v, v, g);
1440             return true;
1441         }
1442     }
1443 
1444     return false;
1445 }
1446 
1447 static
populateTrivialGraph(const NGHolder & h)1448 RoseInGraph populateTrivialGraph(const NGHolder &h) {
1449     RoseInGraph g;
1450     shared_ptr<NGHolder> root_g = cloneHolder(h);
1451     bool orig_anch = isAnchored(*root_g);
1452     orig_anch |= deanchorIfNeeded(*root_g);
1453 
1454     DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch);
1455 
1456     auto start = add_vertex(RoseInVertexProps::makeStart(orig_anch), g);
1457     auto accept = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
1458 
1459     add_edge(start, accept, RoseInEdgeProps(root_g, 0), g);
1460 
1461     return g;
1462 }
1463 
1464 static
avoidOutfixes(RoseInGraph & vg,bool last_chance,const CompileContext & cc)1465 void avoidOutfixes(RoseInGraph &vg, bool last_chance,
1466                    const CompileContext &cc) {
1467     STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n");
1468     assert(num_vertices(vg) == 2);
1469     assert(num_edges(vg) == 1);
1470 
1471     RoseInEdge e = *edges(vg).first;
1472 
1473     NGHolder &h = *vg[e].graph;
1474     assert(isCorrectlyTopped(h));
1475 
1476     renumber_vertices(h);
1477     renumber_edges(h);
1478 
1479     unique_ptr<VertLitInfo>  split = findBestNormalSplit(h, vg, {e}, cc);
1480 
1481     if (split && splitRoseEdge(h, vg, {e}, *split)) {
1482         DEBUG_PRINTF("split on simple literal\n");
1483         return;
1484     }
1485 
1486     if (last_chance) {
1487         /* look for a prefix split as it allows us to accept very weak anchored
1488          * literals. */
1489         auto depths = calcDepths(h);
1490 
1491         split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc);
1492 
1493         if (split && splitRoseEdge(h, vg, {e}, *split)) {
1494             DEBUG_PRINTF("split on simple literal\n");
1495             return;
1496         }
1497     }
1498 
1499     doNetflowCut(h, nullptr, vg, {e}, false, cc.grey);
1500 }
1501 
1502 static
removeRedundantPrefixes(RoseInGraph & g)1503 void removeRedundantPrefixes(RoseInGraph &g) {
1504     STAGE_DEBUG_PRINTF("REMOVING REDUNDANT PREFIXES\n");
1505 
1506     for (const RoseInEdge &e : edges_range(g)) {
1507         RoseInVertex s = source(e, g);
1508         RoseInVertex t = target(e, g);
1509 
1510         if (g[s].type != RIV_START || g[t].type != RIV_LITERAL) {
1511             continue;
1512         }
1513 
1514         if (!g[e].graph) {
1515             continue;
1516         }
1517 
1518         assert(!g[t].delay);
1519         const ue2_literal &lit = g[t].s;
1520 
1521         if (!literalIsWholeGraph(*g[e].graph, lit)) {
1522             DEBUG_PRINTF("not whole graph\n");
1523             continue;
1524         }
1525 
1526         if (!isFloating(*g[e].graph)) {
1527             DEBUG_PRINTF("not floating\n");
1528             continue;
1529         }
1530         g[e].graph.reset();
1531     }
1532 }
1533 
1534 static
maxDelay(const CompileContext & cc)1535 u32 maxDelay(const CompileContext &cc) {
1536     if (!cc.streaming) {
1537         return MO_INVALID_IDX;
1538     }
1539     return cc.grey.maxHistoryAvailable;
1540 }
1541 
1542 static
removeRedundantLiteralsFromPrefixes(RoseInGraph & g,const CompileContext & cc)1543 void removeRedundantLiteralsFromPrefixes(RoseInGraph &g,
1544                                          const CompileContext &cc) {
1545     STAGE_DEBUG_PRINTF("REMOVING LITERALS FROM PREFIXES\n");
1546 
1547     vector<RoseInEdge> to_anchor;
1548     for (const RoseInEdge &e : edges_range(g)) {
1549         RoseInVertex s = source(e, g);
1550         RoseInVertex t = target(e, g);
1551 
1552         if (g[s].type != RIV_START && g[s].type != RIV_ANCHORED_START) {
1553             continue;
1554         }
1555 
1556         if (g[t].type != RIV_LITERAL) {
1557             continue;
1558         }
1559 
1560         if (!g[e].graph) {
1561             continue;
1562         }
1563 
1564         if (g[e].graph_lag) {
1565             /* already removed redundant parts of literals */
1566             continue;
1567         }
1568 
1569         if (g[e].dfa) {
1570             /* if we removed any more states, we would need to rebuild the
1571              * the dfa which can be time consuming. */
1572             continue;
1573         }
1574 
1575         assert(!g[t].delay);
1576         const ue2_literal &lit = g[t].s;
1577 
1578         DEBUG_PRINTF("removing states for literal: %s\n",
1579                      dumpString(lit).c_str());
1580 
1581         unique_ptr<NGHolder> h = cloneHolder(*g[e].graph);
1582         const u32 max_delay = maxDelay(cc);
1583 
1584         u32 delay = removeTrailingLiteralStates(*h, lit, max_delay,
1585                                               false /* can't overhang start */);
1586 
1587         DEBUG_PRINTF("got delay %u (max allowed %u)\n", delay, max_delay);
1588 
1589         if (edge(h->startDs, h->accept, *h).second) {
1590             /* we should have delay == lit.length(), but in really complex
1591              * cases we may fail to identify that we can remove the whole
1592              * graph. Regardless, the fact that sds is wired to accept means the
1593              * graph serves no purpose. */
1594             DEBUG_PRINTF("whole graph\n");
1595             g[e].graph.reset();
1596             continue;
1597         }
1598 
1599         if (delay == lit.length() && edge(h->start, h->accept, *h).second
1600             && num_vertices(*h) == N_SPECIALS) {
1601             to_anchor.push_back(e);
1602             continue;
1603         }
1604 
1605         /* if we got here we should still have an interesting graph */
1606         assert(delay == max_delay || num_vertices(*h) > N_SPECIALS);
1607 
1608         if (delay && delay != MO_INVALID_IDX) {
1609             DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get());
1610 
1611             g[e].graph = move(h);
1612             g[e].graph_lag = delay;
1613         }
1614     }
1615 
1616     if (!to_anchor.empty()) {
1617         RoseInVertex anch = add_vertex(RoseInVertexProps::makeStart(true), g);
1618 
1619         for (RoseInEdge e : to_anchor) {
1620             DEBUG_PRINTF("rehoming to anchor\n");
1621             RoseInVertex v = target(e, g);
1622             add_edge(anch, v, g);
1623             remove_edge(e, g);
1624         }
1625     }
1626 }
1627 
1628 static
isStarCliche(const NGHolder & g)1629 bool isStarCliche(const NGHolder &g) {
1630     DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g));
1631 
1632     bool nonspecials_seen = false;
1633 
1634     for (auto v : vertices_range(g)) {
1635         if (is_special(v, g)) {
1636             continue;
1637         }
1638 
1639         if (nonspecials_seen) {
1640             return false;
1641         }
1642         nonspecials_seen = true;
1643 
1644         if (!g[v].char_reach.all()) {
1645             return false;
1646         }
1647 
1648         if (!hasSelfLoop(v, g)) {
1649             return false;
1650         }
1651         if (!edge(v, g.accept, g).second) {
1652             return false;
1653         }
1654     }
1655 
1656     if (!nonspecials_seen) {
1657         return false;
1658     }
1659 
1660     if (!edge(g.start, g.accept, g).second) {
1661         return false;
1662     }
1663 
1664     return true;
1665 }
1666 
1667 static
removeRedundantLiteralsFromInfix(const NGHolder & h,RoseInGraph & ig,const vector<RoseInEdge> & ee,const CompileContext & cc)1668 void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig,
1669                                       const vector<RoseInEdge> &ee,
1670                                       const CompileContext &cc) {
1671     /* TODO: This could be better by not creating a separate graph for each
1672      * successor literal. This would require using distinct report ids and also
1673      * taking into account overlap of successor literals. */
1674 
1675     set<ue2_literal> preds;
1676     set<ue2_literal> succs;
1677     for (const RoseInEdge &e : ee) {
1678         RoseInVertex u = source(e, ig);
1679         assert(ig[u].type == RIV_LITERAL);
1680         assert(!ig[u].delay);
1681         preds.insert(ig[u].s);
1682 
1683         RoseInVertex v = target(e, ig);
1684         assert(ig[v].type == RIV_LITERAL);
1685         assert(!ig[v].delay);
1686         succs.insert(ig[v].s);
1687 
1688         if (ig[e].graph_lag) {
1689             /* already removed redundant parts of literals */
1690             return;
1691         }
1692 
1693         assert(!ig[e].dfa);
1694     }
1695 
1696     map<ue2_literal, pair<shared_ptr<NGHolder>, u32> > graphs; /* + delay */
1697 
1698     for (const ue2_literal &right : succs) {
1699         size_t max_overlap = 0;
1700         for (const ue2_literal &left : preds) {
1701             size_t overlap = maxOverlap(left, right, 0);
1702             ENSURE_AT_LEAST(&max_overlap, overlap);
1703         }
1704 
1705         u32 max_allowed_delay = right.length() - max_overlap;
1706 
1707         if (cc.streaming) {
1708             LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable);
1709         }
1710 
1711         if (!max_allowed_delay) {
1712             continue;
1713         }
1714 
1715         shared_ptr<NGHolder> h_new = cloneHolder(h);
1716 
1717         u32 delay = removeTrailingLiteralStates(*h_new, right,
1718                                                 max_allowed_delay);
1719 
1720         if (delay == MO_INVALID_IDX) {
1721             /* successor literal could not match infix -> ignore false path */
1722             assert(0);
1723             continue;
1724         }
1725 
1726         if (!delay) {
1727             /* unable to trim graph --> no point swapping to new holder */
1728             continue;
1729         }
1730 
1731         assert(isCorrectlyTopped(*h_new));
1732         graphs[right] = make_pair(h_new, delay);
1733     }
1734 
1735     for (const RoseInEdge &e : ee) {
1736         RoseInVertex v = target(e, ig);
1737         const ue2_literal &succ = ig[v].s;
1738         if (!contains(graphs, succ)) {
1739             continue;
1740         }
1741 
1742         ig[e].graph = graphs[succ].first;
1743         ig[e].graph_lag = graphs[succ].second;
1744 
1745         if (isStarCliche(*ig[e].graph)) {
1746             DEBUG_PRINTF("is a X star!\n");
1747             ig[e].graph.reset();
1748             ig[e].graph_lag = 0;
1749         }
1750     }
1751 }
1752 
1753 static
removeRedundantLiteralsFromInfixes(RoseInGraph & g,const CompileContext & cc)1754 void removeRedundantLiteralsFromInfixes(RoseInGraph &g,
1755                                         const CompileContext &cc) {
1756     insertion_ordered_map<NGHolder *, vector<RoseInEdge>> infixes;
1757 
1758     for (const RoseInEdge &e : edges_range(g)) {
1759         RoseInVertex s = source(e, g);
1760         RoseInVertex t = target(e, g);
1761 
1762         if (g[s].type != RIV_LITERAL || g[t].type != RIV_LITERAL) {
1763             continue;
1764         }
1765 
1766         if (!g[e].graph) {
1767             continue;
1768         }
1769 
1770         assert(!g[t].delay);
1771         if (g[e].dfa) {
1772             /* if we removed any more states, we would need to rebuild the
1773              * the dfa which can be time consuming. */
1774             continue;
1775         }
1776 
1777         NGHolder *h = g[e].graph.get();
1778         infixes[h].push_back(e);
1779     }
1780 
1781     for (const auto &m : infixes) {
1782         NGHolder *h = m.first;
1783         const auto &edges = m.second;
1784         removeRedundantLiteralsFromInfix(*h, g, edges, cc);
1785     }
1786 }
1787 
1788 static
removeRedundantLiterals(RoseInGraph & g,const CompileContext & cc)1789 void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) {
1790     removeRedundantLiteralsFromPrefixes(g, cc);
1791     removeRedundantLiteralsFromInfixes(g, cc);
1792 }
1793 
1794 static
getStart(RoseInGraph & vg)1795 RoseInVertex getStart(RoseInGraph &vg) {
1796     for (RoseInVertex v : vertices_range(vg)) {
1797         if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) {
1798             return v;
1799         }
1800     }
1801     assert(0);
1802     return RoseInGraph::null_vertex();
1803 }
1804 
1805 /**
1806  * Finds the initial accept vertex created to which suffix/outfixes are
1807  * attached.
1808  */
1809 static
getPrimaryAccept(RoseInGraph & vg)1810 RoseInVertex getPrimaryAccept(RoseInGraph &vg) {
1811     for (RoseInVertex v : vertices_range(vg)) {
1812         if (vg[v].type == RIV_ACCEPT && vg[v].reports.empty()) {
1813             return v;
1814         }
1815     }
1816     assert(0);
1817     return RoseInGraph::null_vertex();
1818 }
1819 
1820 static
willBeTransient(const depth & max_depth,const CompileContext & cc)1821 bool willBeTransient(const depth &max_depth, const CompileContext &cc) {
1822     if (!cc.streaming) {
1823         return max_depth <= depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH);
1824     } else {
1825         return max_depth <= depth(cc.grey.maxHistoryAvailable + 1);
1826     }
1827 }
1828 
1829 static
willBeAnchoredTable(const depth & max_depth,const Grey & grey)1830 bool willBeAnchoredTable(const depth &max_depth, const Grey &grey) {
1831     return max_depth <= depth(grey.maxAnchoredRegion);
1832 }
1833 
1834 static
make_chain(u32 count)1835 unique_ptr<NGHolder> make_chain(u32 count) {
1836     assert(count);
1837 
1838     auto rv = make_unique<NGHolder>(NFA_INFIX);
1839 
1840     NGHolder &h = *rv;
1841 
1842     NFAVertex u = h.start;
1843     for (u32 i = 0; i < count; i++) {
1844         NFAVertex v = add_vertex(h);
1845         h[v].char_reach = CharReach::dot();
1846         add_edge(u, v, h);
1847         u = v;
1848     }
1849     h[u].reports.insert(0);
1850     add_edge(u, h.accept, h);
1851 
1852     setTops(h);
1853 
1854     return rv;
1855 }
1856 
1857 #define SHORT_TRIGGER_LEN 16
1858 
1859 static
makeTransientFromLongLiteral(NGHolder & h,RoseInGraph & vg,const vector<RoseInEdge> & ee,const CompileContext & cc)1860 bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg,
1861                                   const vector<RoseInEdge> &ee,
1862                                   const CompileContext &cc) {
1863     /* check max width and literal lengths to see if possible */
1864     size_t min_lit = (size_t)~0ULL;
1865     for (const RoseInEdge &e : ee) {
1866         RoseInVertex v = target(e, vg);
1867         LIMIT_TO_AT_MOST(&min_lit, vg[v].s.length());
1868     }
1869 
1870     if (min_lit <= SHORT_TRIGGER_LEN || min_lit >= UINT_MAX) {
1871         return false;
1872     }
1873 
1874     depth max_width = findMaxWidth(h);
1875 
1876     u32 delta = min_lit - SHORT_TRIGGER_LEN;
1877 
1878     if (!willBeTransient(max_width - depth(delta), cc)
1879         && !willBeAnchoredTable(max_width - depth(delta), cc.grey)) {
1880         return false;
1881     }
1882 
1883     DEBUG_PRINTF("candidate for splitting long literal (len %zu)\n", min_lit);
1884     DEBUG_PRINTF("delta = %u\n", delta);
1885 
1886     /* try split */
1887     map<RoseInVertex, shared_ptr<NGHolder> > graphs;
1888     for (const RoseInEdge &e : ee) {
1889         RoseInVertex v = target(e, vg);
1890 
1891         shared_ptr<NGHolder> h_new = cloneHolder(h);
1892 
1893         u32 delay = removeTrailingLiteralStates(*h_new, vg[v].s, delta);
1894 
1895         DEBUG_PRINTF("delay %u\n", delay);
1896 
1897         if (delay != delta) {
1898             DEBUG_PRINTF("unable to trim literal\n");
1899             return false;
1900         }
1901 
1902         if (in_degree(v, vg) != 1) {
1903             DEBUG_PRINTF("complicated\n");
1904             return false;
1905         }
1906 
1907         DEBUG_PRINTF("new mw = %u\n", (u32)findMaxWidth(*h_new));
1908         assert(willBeTransient(findMaxWidth(*h_new), cc)
1909                || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey));
1910 
1911         assert(isCorrectlyTopped(*h_new));
1912         graphs[v] = h_new;
1913     }
1914 
1915     /* add .{repeats} from prefixes to long literals */
1916     for (const RoseInEdge &e : ee) {
1917         RoseInVertex s = source(e, vg);
1918         RoseInVertex t = target(e, vg);
1919 
1920         remove_edge(e, vg);
1921         const ue2_literal &orig_lit = vg[t].s;
1922 
1923         ue2_literal lit(orig_lit.begin(), orig_lit.end() - delta);
1924 
1925         ue2_literal lit2(orig_lit.end() - delta, orig_lit.end());
1926 
1927         assert(lit.length() + delta == orig_lit.length());
1928 
1929         vg[t].s = lit2;
1930 
1931         RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
1932         add_edge(s, v, RoseInEdgeProps(graphs[t], 0), vg);
1933         add_edge(v, t, RoseInEdgeProps(make_chain(delta), 0), vg);
1934     }
1935 
1936     DEBUG_PRINTF("success\n");
1937     /* TODO: alter split point to avoid pathological splits */
1938     return true;
1939 }
1940 
1941 static
restoreTrailingLiteralStates(NGHolder & g,const ue2_literal & lit,u32 delay,const vector<NFAVertex> & preds)1942 void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
1943                                   u32 delay, const vector<NFAVertex> &preds) {
1944     assert(delay <= lit.length());
1945     assert(isCorrectlyTopped(g));
1946     DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay);
1947 
1948     NFAVertex prev = g.accept;
1949     auto it = lit.rbegin();
1950     while (delay--) {
1951         NFAVertex curr = add_vertex(g);
1952         assert(it != lit.rend());
1953         g[curr].char_reach = *it;
1954         add_edge(curr, prev, g);
1955         ++it;
1956         prev = curr;
1957     }
1958 
1959     for (auto v : preds) {
1960         NFAEdge e = add_edge_if_not_present(v, prev, g);
1961         if (v == g.start && is_triggered(g)) {
1962             g[e].tops.insert(DEFAULT_TOP);
1963         }
1964     }
1965 
1966     // Every predecessor of accept must have a report.
1967     set_report(g, 0);
1968 
1969     renumber_vertices(g);
1970     renumber_edges(g);
1971     assert(allMatchStatesHaveReports(g));
1972     assert(isCorrectlyTopped(g));
1973 }
1974 
1975 static
restoreTrailingLiteralStates(NGHolder & g,const vector<pair<ue2_literal,u32>> & lits)1976 void restoreTrailingLiteralStates(NGHolder &g,
1977                                   const vector<pair<ue2_literal, u32>> &lits) {
1978     vector<NFAVertex> preds;
1979     insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g));
1980     clear_in_edges(g.accept, g);
1981 
1982     for (auto v : preds) {
1983         g[v].reports.clear(); /* clear report from old accepts */
1984     }
1985 
1986     for (const auto &p : lits) {
1987         const ue2_literal &lit = p.first;
1988         u32 delay = p.second;
1989 
1990         restoreTrailingLiteralStates(g, lit, delay, preds);
1991     }
1992 }
1993 
1994 static
improvePrefix(NGHolder & h,RoseInGraph & vg,const vector<RoseInEdge> & ee,const CompileContext & cc)1995 bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee,
1996                    const CompileContext &cc) {
1997     DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h,
1998                   num_vertices(h));
1999     assert(isCorrectlyTopped(h));
2000 
2001     renumber_vertices(h);
2002     renumber_edges(h);
2003 
2004     auto depths = calcDepths(h);
2005 
2006     /* If the reason the prefix is not transient is due to a very long literal
2007      * following, we can make it transient by restricting ourselves to using
2008      * just the head of the literal. */
2009     if (makeTransientFromLongLiteral(h, vg, ee, cc)) {
2010         return true;
2011     }
2012 
2013     auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc);
2014 
2015     if (split && (split->creates_transient || split->creates_anchored)
2016         && splitRoseEdge(h, vg, ee, *split)) {
2017         DEBUG_PRINTF("split on simple literal\n");
2018         return true;
2019     }
2020 
2021     /* large back edges may prevent us identifing anchored or transient cases
2022      * properly - use a simple walk instead */
2023 
2024     if (doNetflowCut(h, &depths, vg, ee, true, cc.grey)) {
2025         return true;
2026     }
2027 
2028     if (split && splitRoseEdge(h, vg, ee, *split)) {
2029         /* use the simple split even though it doesn't create a transient
2030          * prefix */
2031         DEBUG_PRINTF("split on simple literal\n");
2032         return true;
2033     }
2034 
2035     /* look for netflow cuts which don't produce good prefixes */
2036     if (doNetflowCut(h, &depths, vg, ee, false, cc.grey)) {
2037         return true;
2038     }
2039 
2040     if (ee.size() > 1) {
2041         DEBUG_PRINTF("split the prefix apart based on succ literals\n");
2042         unordered_map<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> >,
2043                       NGHolderHasher, NGHolderEqual> trimmed;
2044 
2045         for (const auto &e : ee) {
2046             shared_ptr<NGHolder> hh = cloneHolder(h);
2047             auto succ_lit = vg[target(e, vg)].s;
2048             assert(isCorrectlyTopped(*hh));
2049             u32 delay = removeTrailingLiteralStates(*hh, succ_lit,
2050                                                     succ_lit.length(),
2051                                               false /* can't overhang start */);
2052             if (!delay) {
2053                 DEBUG_PRINTF("could not remove any literal, skip over\n");
2054                 continue;
2055             }
2056 
2057             assert(isCorrectlyTopped(*hh));
2058             trimmed[hh].emplace_back(e, delay);
2059         }
2060 
2061         if (trimmed.size() == 1) {
2062             return false;
2063         }
2064 
2065         /* shift the contents to a vector so we can modify the graphs without
2066          * violating the map's invariants. */
2067         vector<pair<shared_ptr<NGHolder>, vector<pair<RoseInEdge, u32> > > >
2068             trimmed_vec(trimmed.begin(), trimmed.end());
2069         trimmed.clear();
2070         for (auto &elem : trimmed_vec) {
2071             shared_ptr<NGHolder> &hp = elem.first;
2072             vector<pair<ue2_literal, u32>> succ_lits;
2073 
2074             for (const auto &edge_delay : elem.second) {
2075                 const RoseInEdge &e = edge_delay.first;
2076                 u32 delay = edge_delay.second;
2077                 auto lit = vg[target(e, vg)].s;
2078 
2079                 vg[e].graph = hp;
2080                 assert(delay <= lit.length());
2081                 succ_lits.emplace_back(lit, delay);
2082             }
2083             restoreTrailingLiteralStates(*hp, succ_lits);
2084         }
2085         return true;
2086     }
2087 
2088     return false;
2089 }
2090 
2091 #define MAX_FIND_BETTER_PREFIX_GEN   4
2092 #define MAX_FIND_BETTER_PREFIX_COUNT 100
2093 
2094 static
findBetterPrefixes(RoseInGraph & vg,const CompileContext & cc)2095 void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) {
2096     STAGE_DEBUG_PRINTF("FIND BETTER PREFIXES\n");
2097     RoseInVertex start = getStart(vg);
2098 
2099     insertion_ordered_map<NGHolder *, vector<RoseInEdge>> prefixes;
2100     bool changed;
2101     u32 gen = 0;
2102     do {
2103         DEBUG_PRINTF("gen %u\n", gen);
2104         changed = false;
2105         prefixes.clear();
2106 
2107         /* find prefixes */
2108         for (const RoseInEdge &e : out_edges_range(start, vg)) {
2109             /* outfixes shouldn't have made it this far */
2110             assert(vg[target(e, vg)].type == RIV_LITERAL);
2111             if (vg[e].graph) {
2112                 NGHolder *h = vg[e].graph.get();
2113                 prefixes[h].push_back(e);
2114             }
2115         }
2116 
2117         if (prefixes.size() > MAX_FIND_BETTER_PREFIX_COUNT) {
2118             break;
2119         }
2120 
2121         /* look for bad prefixes and try to split */
2122         for (const auto &m : prefixes) {
2123             NGHolder *h = m.first;
2124             const auto &edges = m.second;
2125             depth max_width = findMaxWidth(*h);
2126             if (willBeTransient(max_width, cc)
2127                 || willBeAnchoredTable(max_width, cc.grey)) {
2128                 continue;
2129             }
2130 
2131             changed = improvePrefix(*h, vg, edges, cc);
2132         }
2133     } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN);
2134 }
2135 
2136 #define STRONG_LITERAL_LENGTH 20
2137 #define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10
2138 
2139 static
extractStrongLiteral(NGHolder & h,RoseInGraph & vg,const vector<RoseInEdge> & ee,const CompileContext & cc)2140 bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg,
2141                           const vector<RoseInEdge> &ee,
2142                           const CompileContext &cc) {
2143     DEBUG_PRINTF("looking for string literal\n");
2144     unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc);
2145 
2146     if (split && min_len(split->lit) >= STRONG_LITERAL_LENGTH) {
2147         DEBUG_PRINTF("splitting simple literal\n");
2148         return splitRoseEdge(h, vg, ee, *split);
2149     }
2150 
2151     return false;
2152 }
2153 
2154 static
extractStrongLiterals(RoseInGraph & vg,const CompileContext & cc)2155 void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) {
2156     if (!cc.grey.violetExtractStrongLiterals) {
2157         return;
2158     }
2159 
2160     STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n");
2161 
2162     unordered_set<NGHolder *> stuck;
2163     insertion_ordered_map<NGHolder *, vector<RoseInEdge>> edges_by_graph;
2164     bool changed;
2165 
2166     do {
2167         changed = false;
2168 
2169         edges_by_graph.clear();
2170         for (const RoseInEdge &ve : edges_range(vg)) {
2171             if (vg[source(ve, vg)].type != RIV_LITERAL) {
2172                 continue;
2173             }
2174 
2175             if (vg[ve].graph) {
2176                 NGHolder *h = vg[ve].graph.get();
2177                 edges_by_graph[h].push_back(ve);
2178             }
2179         }
2180 
2181         if (edges_by_graph.size() > MAX_EXTRACT_STRONG_LITERAL_GRAPHS) {
2182             DEBUG_PRINTF("too many graphs, stopping\n");
2183             return;
2184         }
2185 
2186         for (const auto &m : edges_by_graph) {
2187             NGHolder *g = m.first;
2188             const auto &edges = m.second;
2189             if (contains(stuck, g)) {
2190                 DEBUG_PRINTF("already known to be bad\n");
2191                 continue;
2192             }
2193             bool rv = extractStrongLiteral(*g, vg, edges, cc);
2194             if (rv) {
2195                 changed = true;
2196             } else {
2197                 stuck.insert(g);
2198             }
2199         }
2200     } while (changed);
2201 }
2202 
2203 #define INFIX_STRONG_GUARD_LEN 8
2204 #define INFIX_MIN_SPLIT_LITERAL_LEN 12
2205 
2206 static
improveInfix(NGHolder & h,RoseInGraph & vg,const vector<RoseInEdge> & ee,const CompileContext & cc)2207 bool improveInfix(NGHolder &h, RoseInGraph &vg, const vector<RoseInEdge> &ee,
2208                   const CompileContext &cc) {
2209     unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, ee, cc);
2210 
2211     if (split && min_len(split->lit) >= INFIX_MIN_SPLIT_LITERAL_LEN
2212         && splitRoseEdge(h, vg, ee, *split)) {
2213         DEBUG_PRINTF("splitting simple literal\n");
2214         return true;
2215     }
2216 
2217     DEBUG_PRINTF("trying for a netflow cut\n");
2218     /* look for netflow cuts which don't produce good prefixes */
2219     bool rv = doNetflowCut(h, nullptr, vg, ee, false, cc.grey, 8);
2220 
2221     DEBUG_PRINTF("did netfow cut? = %d\n", (int)rv);
2222 
2223     return rv;
2224 }
2225 
2226 /**
2227  * Infixes which are weakly guarded can, in effect, act like prefixes as they
2228  * will often be live. We should try to split these infixes further if they
2229  * contain strong literals so that we are at least running smaller weak infixes
2230  * which can hopeful be accelerated/miracled.
2231  */
2232 static
improveWeakInfixes(RoseInGraph & vg,const CompileContext & cc)2233 void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) {
2234     if (!cc.grey.violetAvoidWeakInfixes) {
2235         return;
2236     }
2237     STAGE_DEBUG_PRINTF("IMPROVE WEAK INFIXES\n");
2238 
2239     RoseInVertex start = getStart(vg);
2240 
2241     unordered_set<NGHolder *> weak;
2242 
2243     for (RoseInVertex vv : adjacent_vertices_range(start, vg)) {
2244         /* outfixes shouldn't have made it this far */
2245         assert(vg[vv].type == RIV_LITERAL);
2246         if (vg[vv].s.length() >= INFIX_STRONG_GUARD_LEN) {
2247             continue;
2248         }
2249 
2250         for (const RoseInEdge &e : out_edges_range(vv, vg)) {
2251             if (vg[target(e, vg)].type != RIV_LITERAL || !vg[e].graph) {
2252                 continue;
2253             }
2254 
2255             NGHolder *h = vg[e].graph.get();
2256             DEBUG_PRINTF("'%s' guards %p\n", dumpString(vg[vv].s).c_str(), h);
2257             weak.insert(h);
2258         }
2259     }
2260 
2261     insertion_ordered_map<NGHolder *, vector<RoseInEdge>> weak_edges;
2262     for (const RoseInEdge &ve : edges_range(vg)) {
2263         NGHolder *h = vg[ve].graph.get();
2264         if (contains(weak, h)) {
2265             weak_edges[h].push_back(ve);
2266         }
2267     }
2268 
2269     for (const auto &m : weak_edges) {
2270         NGHolder *h = m.first;
2271         const auto &edges = m.second;
2272         improveInfix(*h, vg, edges, cc);
2273     }
2274 }
2275 
2276 static
splitEdgesForSuffix(const NGHolder & base_graph,RoseInGraph & vg,const vector<RoseInEdge> & ee,const VertLitInfo & split,bool eod,const flat_set<ReportID> & reports)2277 void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg,
2278                          const vector<RoseInEdge> &ee, const VertLitInfo &split,
2279                          bool eod, const flat_set<ReportID> &reports) {
2280     const vector<NFAVertex> &splitters = split.vv;
2281     assert(!splitters.empty());
2282 
2283     shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
2284     unordered_map<NFAVertex, NFAVertex> v_map;
2285     cloneHolder(*lhs, base_graph, &v_map);
2286     lhs->kind = NFA_INFIX;
2287     clear_in_edges(lhs->accept, *lhs);
2288     clear_in_edges(lhs->acceptEod, *lhs);
2289     add_edge(lhs->accept, lhs->acceptEod, *lhs);
2290     clearReports(*lhs);
2291     for (NFAVertex v : splitters) {
2292         NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs);
2293         if (v == base_graph.start) {
2294             (*lhs)[e].tops.insert(DEFAULT_TOP);
2295         }
2296         (*lhs)[v_map[v]].reports.insert(0);
2297 
2298     }
2299     pruneUseless(*lhs);
2300     assert(isCorrectlyTopped(*lhs));
2301 
2302     /* create literal vertices and connect preds */
2303     for (const auto &lit : split.lit) {
2304         if (!can_match(*lhs, lit, is_triggered(*lhs))) {
2305             continue;
2306         }
2307 
2308         DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str());
2309         RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg);
2310 
2311         RoseInVertex tt;
2312         if (eod) {
2313             DEBUG_PRINTF("doing eod\n");
2314             tt = add_vertex(RoseInVertexProps::makeAcceptEod(reports), vg);
2315         } else {
2316             DEBUG_PRINTF("doing non-eod\n");
2317             tt = add_vertex(RoseInVertexProps::makeAccept(reports), vg);
2318         }
2319         add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg);
2320 
2321         for (const RoseInEdge &e : ee) {
2322             RoseInVertex u = source(e, vg);
2323             assert(!edge(u, v, vg).second);
2324             add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg);
2325         }
2326     }
2327 }
2328 
2329 #define MIN_SUFFIX_LEN 6
2330 
2331 static
replaceSuffixWithInfix(const NGHolder & h,RoseInGraph & vg,const vector<RoseInEdge> & suffix_edges,const CompileContext & cc)2332 bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg,
2333                             const vector<RoseInEdge> &suffix_edges,
2334                             const CompileContext &cc) {
2335     DEBUG_PRINTF("inspecting suffix : %p on %zu edges\n", &h,
2336                  suffix_edges.size());
2337     /*
2338      * We would, in general, rather not have output exposed engines because
2339      * once they are triggered, they must be run while infixes only have to run
2340      * if the successor literal is seen. Matches from output exposed engines
2341      * also have to be placed in a priority queue and interleaved with matches
2342      * from other sources.
2343      *
2344      * Note:
2345      * - if the LHS is extremely unlikely we may be better off leaving
2346      *   a suffix unguarded.
2347      *
2348      * - limited width suffixes may be less bad as they won't be continuously
2349      *   active, we may want to have (a) stronger controls on if we want to pick
2350      *   a trailing literal in these cases and/or (b) look also for literals
2351      *   near accept as well as right on accept
2352      *
2353      * TODO: improve heuristics, splitting logic.
2354      */
2355 
2356     /* we may do multiple splits corresponding to different report behaviour */
2357     set<NFAVertex> seen;
2358     map<pair<bool, flat_set<ReportID> >, VertLitInfo> by_reports; /* eod, rep */
2359 
2360     for (NFAVertex v : inv_adjacent_vertices_range(h.accept, h)) {
2361         set<ue2_literal> ss = getLiteralSet(h, v, false);
2362         if (ss.empty()) {
2363             DEBUG_PRINTF("candidate is too shitty\n");
2364             return false;
2365         }
2366 
2367         VertLitInfo &vli = by_reports[make_pair(false, h[v].reports)];
2368         insert(&vli.lit, ss);
2369         vli.vv.push_back(v);
2370         seen.insert(v);
2371     }
2372 
2373     seen.insert(h.accept);
2374     for (NFAVertex v : inv_adjacent_vertices_range(h.acceptEod, h)) {
2375         if (contains(seen, v)) {
2376             continue;
2377         }
2378 
2379         set<ue2_literal> ss = getLiteralSet(h, v, false);
2380         if (ss.empty()) {
2381             DEBUG_PRINTF("candidate is too shitty\n");
2382             return false;
2383         }
2384 
2385         VertLitInfo &vli = by_reports[make_pair(true, h[v].reports)];
2386         insert(&vli.lit, ss);
2387         vli.vv.push_back(v);
2388     }
2389 
2390     assert(!by_reports.empty());
2391 
2392     /* TODO: how strong a min len do we want here ? */
2393     u32 min_len = cc.grey.minRoseLiteralLength;
2394     ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN);
2395 
2396     for (auto &vli : by_reports | map_values) {
2397         u64a score = sanitizeAndCompressAndScore(vli.lit);
2398 
2399         if (vli.lit.empty()
2400             || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len,
2401                                               false, false)) {
2402             return false;
2403         }
2404     }
2405 
2406     for (const auto &info : by_reports) {
2407         DEBUG_PRINTF("splitting on simple literals\n");
2408         splitEdgesForSuffix(h, vg, suffix_edges, info.second,
2409                             info.first.first /* eod */,
2410                             info.first.second /* reports */);
2411     }
2412 
2413     for (const RoseInEdge &e : suffix_edges) {
2414         remove_edge(e, vg);
2415     }
2416     return true;
2417 }
2418 
2419 static
avoidSuffixes(RoseInGraph & vg,const CompileContext & cc)2420 void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) {
2421     if (!cc.grey.violetAvoidSuffixes) {
2422         return;
2423     }
2424 
2425     STAGE_DEBUG_PRINTF("AVOID SUFFIXES\n");
2426 
2427     RoseInVertex accept = getPrimaryAccept(vg);
2428 
2429     insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> suffixes;
2430 
2431     /* find suffixes */
2432     for (const RoseInEdge &e : in_edges_range(accept, vg)) {
2433         /* outfixes shouldn't have made it this far */
2434         assert(vg[source(e, vg)].type == RIV_LITERAL);
2435         assert(vg[e].graph); /* non suffix paths should be wired to other
2436                                 accepts */
2437         const NGHolder *h = vg[e].graph.get();
2438         suffixes[h].push_back(e);
2439     }
2440 
2441     /* look at suffixes and try to split */
2442     for (const auto &m : suffixes) {
2443         const NGHolder *h = m.first;
2444         const auto &edges = m.second;
2445         replaceSuffixWithInfix(*h, vg, edges, cc);
2446     }
2447 }
2448 
2449 static
leadingDotStartLiteral(const NGHolder & h,VertLitInfo * out)2450 bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) {
2451     if (out_degree(h.start, h) != 3) {
2452         return false;
2453     }
2454 
2455     NFAVertex v = NGHolder::null_vertex();
2456     NFAVertex ds = NGHolder::null_vertex();
2457 
2458     for (NFAVertex a : adjacent_vertices_range(h.start, h)) {
2459         if (a == h.startDs) {
2460             continue;
2461         }
2462         if (h[a].char_reach.all()) {
2463             ds = a;
2464             if (out_degree(ds, h) != 2 || !edge(ds, ds, h).second) {
2465                 return false;
2466             }
2467         } else {
2468             v = a;
2469         }
2470     }
2471 
2472     if (!v || !ds || !edge(ds, v, h).second) {
2473         return false;
2474     }
2475 
2476     if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) {
2477         return false;
2478     }
2479 
2480     ue2_literal lit;
2481     lit.push_back(h[v].char_reach.find_first(),
2482                   h[v].char_reach.isCaselessChar());
2483     while (out_degree(v, h) == 1) {
2484         NFAVertex vv = *adjacent_vertices(v, h).first;
2485         if (h[vv].char_reach.count() != 1
2486             && !h[vv].char_reach.isCaselessChar()) {
2487             break;
2488         }
2489 
2490         v = vv;
2491 
2492         lit.push_back(h[v].char_reach.find_first(),
2493                       h[v].char_reach.isCaselessChar());
2494     }
2495 
2496     if (is_match_vertex(v, h) && h.kind != NFA_SUFFIX) {
2497         /* we have rediscovered the post-infix literal */
2498         return false;
2499     }
2500 
2501     if (bad_mixed_sensitivity(lit)) {
2502         make_nocase(&lit);
2503     }
2504 
2505     DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str());
2506     out->vv = {v};
2507     out->lit = {lit};
2508     return true;
2509 }
2510 
2511 static
lookForDoubleCut(const NGHolder & h,const vector<RoseInEdge> & ee,RoseInGraph & vg,const Grey & grey)2512 bool lookForDoubleCut(const NGHolder &h, const vector<RoseInEdge> &ee,
2513                       RoseInGraph &vg, const Grey &grey) {
2514     VertLitInfo info;
2515     if (!leadingDotStartLiteral(h, &info)
2516         || min_len(info.lit) < grey.violetDoubleCutLiteralLen) {
2517         return false;
2518     }
2519     DEBUG_PRINTF("performing split\n");
2520     return splitRoseEdge(h, vg, ee, {info});
2521 }
2522 
2523 static
lookForDoubleCut(RoseInGraph & vg,const CompileContext & cc)2524 void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) {
2525     if (!cc.grey.violetDoubleCut) {
2526         return;
2527     }
2528 
2529     insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges;
2530     for (const RoseInEdge &ve : edges_range(vg)) {
2531         if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) {
2532             const NGHolder *h = vg[ve].graph.get();
2533             right_edges[h].push_back(ve);
2534         }
2535     }
2536 
2537     for (const auto &m : right_edges) {
2538         const NGHolder *h = m.first;
2539         const auto &edges = m.second;
2540         lookForDoubleCut(*h, edges, vg, cc.grey);
2541     }
2542 }
2543 
2544 static
findLiteralBefore(const NGHolder & h,NFAVertex v)2545 pair<NFAVertex, ue2_literal> findLiteralBefore(const NGHolder &h, NFAVertex v) {
2546     ue2_literal lit;
2547     if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) {
2548         return {v, std::move(lit) };
2549     }
2550     lit.push_back(h[v].char_reach.find_first(),
2551                   h[v].char_reach.isCaselessChar());
2552 
2553     while (in_degree(v, h) == 1) {
2554         NFAVertex vv = *inv_adjacent_vertices(v, h).first;
2555         if (h[vv].char_reach.count() != 1
2556             && !h[vv].char_reach.isCaselessChar()) {
2557             break;
2558         }
2559 
2560         lit.push_back(h[vv].char_reach.find_first(),
2561                       h[vv].char_reach.isCaselessChar());
2562         v = vv;
2563     }
2564 
2565     return {v, std::move(lit) };
2566 }
2567 
2568 static
lookForDotStarPred(NFAVertex v,const NGHolder & h,NFAVertex * u,NFAVertex * ds)2569 bool lookForDotStarPred(NFAVertex v, const NGHolder &h,
2570                         NFAVertex *u, NFAVertex *ds) {
2571     *u = NGHolder::null_vertex();
2572     *ds = NGHolder::null_vertex();
2573     for (NFAVertex a : inv_adjacent_vertices_range(v, h)) {
2574         if (h[a].char_reach.all()) {
2575             if (!edge(a, a, h).second) {
2576                 return false;
2577             }
2578 
2579             if (*ds) {
2580                 return false;
2581             }
2582 
2583             *ds = a;
2584         } else {
2585             if (*u) {
2586                 return false;
2587             }
2588             *u = a;
2589         }
2590     }
2591 
2592     if (!*u || !*ds) {
2593         return false;
2594     }
2595 
2596     return true;
2597 }
2598 
2599 static
trailingDotStarLiteral(const NGHolder & h,VertLitInfo * out)2600 bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) {
2601     /* Note: there is no delay yet - so the final literal is the already
2602      * discovered successor literal - we are in fact interested in the literal
2603      * before it. */
2604 
2605     if (in_degree(h.accept, h) != 1) {
2606         return false;
2607     }
2608 
2609     if (in_degree(h.acceptEod, h) != 1) {
2610         assert(0);
2611         return false;
2612     }
2613 
2614     NFAVertex v
2615         = findLiteralBefore(h, *inv_adjacent_vertices(h.accept, h).first).first;
2616 
2617     NFAVertex u;
2618     NFAVertex ds;
2619 
2620     if (!lookForDotStarPred(v, h, &u, &ds)) {
2621         return false;
2622     }
2623 
2624     v = u;
2625     auto rv = findLiteralBefore(h, v);
2626 
2627     if (!lookForDotStarPred(v, h, &u, &ds)) {
2628         return false;
2629     }
2630 
2631     ue2_literal lit = reverse_literal(rv.second);
2632     DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str());
2633 
2634     if (bad_mixed_sensitivity(lit)) {
2635         make_nocase(&lit);
2636     }
2637 
2638     out->vv = {v};
2639     out->lit = {lit};
2640     return true;
2641 }
2642 
2643 static
lookForTrailingLiteralDotStar(const NGHolder & h,const vector<RoseInEdge> & ee,RoseInGraph & vg,const Grey & grey)2644 bool lookForTrailingLiteralDotStar(const NGHolder &h,
2645                                    const vector<RoseInEdge> &ee,
2646                                    RoseInGraph &vg, const Grey &grey) {
2647     VertLitInfo info;
2648     if (!trailingDotStarLiteral(h, &info)
2649         || min_len(info.lit) < grey.violetDoubleCutLiteralLen) {
2650         return false;
2651     }
2652     DEBUG_PRINTF("performing split\n");
2653     return splitRoseEdge(h, vg, ee, info);
2654 }
2655 
2656 /* In streaming mode, active engines have to be caught up at stream boundaries
2657  * and have to be stored in stream state, so we prefer to decompose patterns
2658  * in to literals with no state between them if possible. */
2659 static
decomposeLiteralChains(RoseInGraph & vg,const CompileContext & cc)2660 void decomposeLiteralChains(RoseInGraph &vg, const CompileContext &cc) {
2661     if (!cc.grey.violetLiteralChains) {
2662         return;
2663     }
2664 
2665     insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> right_edges;
2666     bool changed;
2667     do {
2668         changed = false;
2669 
2670         right_edges.clear();
2671         for (const RoseInEdge &ve : edges_range(vg)) {
2672             if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) {
2673                 const NGHolder *h = vg[ve].graph.get();
2674                 right_edges[h].push_back(ve);
2675             }
2676         }
2677 
2678         for (const auto &m : right_edges) {
2679             const NGHolder *h = m.first;
2680             const vector<RoseInEdge> &ee = m.second;
2681             bool rv = lookForDoubleCut(*h, ee, vg, cc.grey);
2682             if (!rv && h->kind != NFA_SUFFIX) {
2683                 rv = lookForTrailingLiteralDotStar(*h, ee, vg, cc.grey);
2684             }
2685             changed |= rv;
2686         }
2687     } while (changed);
2688 }
2689 
2690 static
lookForCleanSplit(const NGHolder & h,const vector<RoseInEdge> & ee,RoseInGraph & vg,const CompileContext & cc)2691 bool lookForCleanSplit(const NGHolder &h, const vector<RoseInEdge> &ee,
2692                        RoseInGraph &vg, const CompileContext &cc) {
2693     unique_ptr<VertLitInfo> split = findBestCleanSplit(h, cc);
2694 
2695     if (split) {
2696         return splitRoseEdge(h, vg, {ee}, *split);
2697     }
2698 
2699     return false;
2700 }
2701 
2702 #define MAX_DESIRED_CLEAN_SPLIT_DEPTH 4
2703 
2704 static
lookForCleanEarlySplits(RoseInGraph & vg,const CompileContext & cc)2705 void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) {
2706     u32 gen = 0;
2707 
2708     insertion_ordered_set<RoseInVertex> prev({getStart(vg)});
2709     insertion_ordered_set<RoseInVertex> curr;
2710 
2711     while (gen < MAX_DESIRED_CLEAN_SPLIT_DEPTH) {
2712         curr.clear();
2713         for (RoseInVertex u : prev) {
2714             for (auto v : adjacent_vertices_range(u, vg)) {
2715                 curr.insert(v);
2716             }
2717         }
2718 
2719         insertion_ordered_map<const NGHolder *, vector<RoseInEdge>> rightfixes;
2720         for (RoseInVertex v : curr) {
2721             for (const RoseInEdge &e : out_edges_range(v, vg)) {
2722                 if (vg[e].graph) {
2723                     NGHolder *h = vg[e].graph.get();
2724                     rightfixes[h].push_back(e);
2725                 }
2726             }
2727         }
2728 
2729         for (const auto &m : rightfixes) {
2730             const NGHolder *h = m.first;
2731             const auto &edges = m.second;
2732             lookForCleanSplit(*h, edges, vg, cc);
2733         }
2734 
2735         prev = std::move(curr);
2736         gen++;
2737     }
2738 }
2739 
2740 static
rehomeEodSuffixes(RoseInGraph & vg)2741 void rehomeEodSuffixes(RoseInGraph &vg) {
2742     // Find edges to accept with EOD-anchored graphs that we can move over to
2743     // acceptEod.
2744     vector<RoseInEdge> acc_edges;
2745     for (const auto &e : edges_range(vg)) {
2746         if (vg[target(e, vg)].type != RIV_ACCEPT) {
2747             continue;
2748         }
2749         if (vg[e].haig || !vg[e].graph) {
2750             continue;
2751         }
2752 
2753         const NGHolder &h = *vg[e].graph;
2754 
2755         if (in_degree(h.accept, h)) {
2756             DEBUG_PRINTF("graph isn't eod anchored\n");
2757             continue;
2758         }
2759 
2760         acc_edges.push_back(e);
2761     }
2762 
2763     for (const RoseInEdge &e : acc_edges) {
2764         // Move this edge from accept to acceptEod
2765         RoseInVertex w = add_vertex(RoseInVertexProps::makeAcceptEod(), vg);
2766         add_edge(source(e, vg), w, vg[e], vg);
2767         remove_edge(e, vg);
2768     }
2769 
2770     /* old accept vertices will be tidied up by final pruneUseless() call */
2771 }
2772 
2773 static
tryForEarlyDfa(const NGHolder & h,const CompileContext & cc)2774 bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) {
2775     switch (h.kind) {
2776     case NFA_OUTFIX: /* 'prefix' of eod */
2777     case NFA_PREFIX:
2778         return cc.grey.earlyMcClellanPrefix;
2779     case NFA_INFIX:
2780         return cc.grey.earlyMcClellanInfix;
2781     case NFA_SUFFIX:
2782         return cc.grey.earlyMcClellanSuffix;
2783     default:
2784         DEBUG_PRINTF("kind %u\n", (u32)h.kind);
2785         assert(0);
2786         return false;
2787     }
2788 }
2789 
2790 static
getDfaTriggers(RoseInGraph & vg,const vector<RoseInEdge> & edges,bool * single_trigger)2791 vector<vector<CharReach>> getDfaTriggers(RoseInGraph &vg,
2792                                          const vector<RoseInEdge> &edges,
2793                                          bool *single_trigger) {
2794     vector<vector<CharReach>> triggers;
2795     u32 min_offset = ~0U;
2796     u32 max_offset = 0;
2797     for (const auto &e : edges) {
2798         RoseInVertex s = source(e, vg);
2799         if (vg[s].type == RIV_LITERAL) {
2800             triggers.push_back(as_cr_seq(vg[s].s));
2801         }
2802         ENSURE_AT_LEAST(&max_offset, vg[s].max_offset);
2803         LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset);
2804     }
2805 
2806     *single_trigger = min_offset == max_offset;
2807     DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset);
2808 
2809     return triggers;
2810 }
2811 
2812 static
doEarlyDfa(RoseBuild & rose,RoseInGraph & vg,NGHolder & h,const vector<RoseInEdge> & edges,bool final_chance,const ReportManager & rm,const CompileContext & cc)2813 bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h,
2814                 const vector<RoseInEdge> &edges, bool final_chance,
2815                 const ReportManager &rm, const CompileContext &cc) {
2816     DEBUG_PRINTF("trying for dfa\n");
2817 
2818     bool single_trigger;
2819     for (const auto &e : edges) {
2820         if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) {
2821             /* TODO: support eod prefixes */
2822             return false;
2823         }
2824     }
2825 
2826     auto triggers = getDfaTriggers(vg, edges, &single_trigger);
2827 
2828     /* TODO: literal delay things */
2829     if (!generates_callbacks(h)) {
2830         set_report(h, rose.getNewNfaReport());
2831     }
2832 
2833     shared_ptr<raw_dfa> dfa = buildMcClellan(h, &rm, single_trigger, triggers,
2834                                              cc.grey, final_chance);
2835 
2836     if (!dfa) {
2837         return false;
2838     }
2839 
2840     DEBUG_PRINTF("dfa ok\n");
2841     for (const auto &e : edges) {
2842         vg[e].dfa = dfa;
2843     }
2844 
2845     return true;
2846 }
2847 
2848 #define MAX_EDGES_FOR_IMPLEMENTABILITY 50
2849 
2850 static
splitForImplementability(RoseInGraph & vg,NGHolder & h,const vector<RoseInEdge> & edges,const CompileContext & cc)2851 bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
2852                               const vector<RoseInEdge> &edges,
2853                               const CompileContext &cc) {
2854     vector<pair<ue2_literal, u32>> succ_lits;
2855     DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n",
2856                   to_string(h.kind).c_str(), num_vertices(h), edges.size());
2857 
2858     if (edges.size() > MAX_EDGES_FOR_IMPLEMENTABILITY) {
2859         return false;
2860     }
2861 
2862     if (!generates_callbacks(h)) {
2863         for (const auto &e : edges) {
2864             const auto &lit = vg[target(e, vg)].s;
2865             u32 delay = vg[e].graph_lag;
2866             vg[e].graph_lag = 0;
2867 
2868             assert(delay <= lit.length());
2869             succ_lits.emplace_back(lit, delay);
2870         }
2871         restoreTrailingLiteralStates(h, succ_lits);
2872     }
2873 
2874     unique_ptr<VertLitInfo> split;
2875     bool last_chance = true;
2876     if (h.kind == NFA_PREFIX) {
2877         auto depths = calcDepths(h);
2878 
2879         split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc);
2880     } else {
2881         split = findBestLastChanceSplit(h, vg, edges, cc);
2882     }
2883 
2884     if (split && splitRoseEdge(h, vg, edges, *split)) {
2885         DEBUG_PRINTF("split on simple literal\n");
2886         return true;
2887     }
2888 
2889     DEBUG_PRINTF("trying to netflow\n");
2890     bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey);
2891     DEBUG_PRINTF("done\n");
2892 
2893     return rv;
2894 }
2895 
2896 #define MAX_IMPLEMENTABLE_SPLITS 50
2897 
ensureImplementable(RoseBuild & rose,RoseInGraph & vg,bool allow_changes,bool final_chance,const ReportManager & rm,const CompileContext & cc)2898 bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes,
2899                          bool final_chance, const ReportManager &rm,
2900                          const CompileContext &cc) {
2901     DEBUG_PRINTF("checking for impl %d\n", final_chance);
2902     bool changed = false;
2903     bool need_to_recalc = false;
2904     u32 added_count = 0;
2905     unordered_set<shared_ptr<NGHolder>> good; /* known to be implementable */
2906     do {
2907         changed = false;
2908         DEBUG_PRINTF("added %u\n", added_count);
2909         insertion_ordered_map<shared_ptr<NGHolder>,
2910                               vector<RoseInEdge>> edges_by_graph;
2911         for (const RoseInEdge &ve : edges_range(vg)) {
2912             if (vg[ve].graph && !vg[ve].dfa) {
2913                 auto &h = vg[ve].graph;
2914                 edges_by_graph[h].push_back(ve);
2915             }
2916         }
2917         for (auto &m : edges_by_graph) {
2918             auto &h = m.first;
2919             if (contains(good, h)) {
2920                 continue;
2921             }
2922             reduceGraphEquivalences(*h, cc);
2923             if (isImplementableNFA(*h, &rm, cc)) {
2924                 good.insert(h);
2925                 continue;
2926             }
2927 
2928             const auto &edges = m.second;
2929 
2930             if (tryForEarlyDfa(*h, cc) &&
2931                 doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) {
2932                 continue;
2933             }
2934 
2935             DEBUG_PRINTF("eek\n");
2936             if (!allow_changes) {
2937                 return false;
2938             }
2939 
2940             if (splitForImplementability(vg, *h, edges, cc)) {
2941                 added_count++;
2942                 if (added_count > MAX_IMPLEMENTABLE_SPLITS) {
2943                     DEBUG_PRINTF("added_count hit limit\n");
2944                     return false;
2945                 }
2946                 changed = true;
2947                 continue;
2948             }
2949 
2950             return false;
2951         }
2952 
2953         assert(added_count <= MAX_IMPLEMENTABLE_SPLITS);
2954 
2955         if (changed) {
2956             removeRedundantLiterals(vg, cc);
2957             pruneUseless(vg);
2958             need_to_recalc = true;
2959         }
2960     } while (changed);
2961 
2962     if (need_to_recalc) {
2963         renumber_vertices(vg);
2964         calcVertexOffsets(vg);
2965     }
2966 
2967     DEBUG_PRINTF("ok!\n");
2968     return true;
2969 }
2970 
2971 static
doInitialVioletTransform(const NGHolder & h,bool last_chance,const CompileContext & cc)2972 RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance,
2973                                      const CompileContext &cc) {
2974     assert(!can_never_match(h));
2975 
2976     RoseInGraph vg = populateTrivialGraph(h);
2977 
2978     if (!cc.grey.allowViolet) {
2979         return vg;
2980     }
2981 
2982     /* Avoid running the Violet analysis at all on graphs with no vertices with
2983      * small reach, since we will not be able to extract any literals. */
2984     if (!hasNarrowReachVertex(h)) {
2985         DEBUG_PRINTF("fail, no vertices with small reach\n");
2986         return vg;
2987     }
2988 
2989     DEBUG_PRINTF("hello world\n");
2990 
2991     /* Step 1: avoid outfixes as we always have to run them. */
2992     avoidOutfixes(vg, last_chance, cc);
2993 
2994     if (num_vertices(vg) <= 2) {
2995         return vg; /* unable to transform pattern */
2996     }
2997 
2998     removeRedundantPrefixes(vg);
2999     dumpPreRoseGraph(vg, cc.grey, "pre_prefix_rose.dot");
3000 
3001     /* Step 2: avoid non-transient prefixes (esp in streaming mode) */
3002     findBetterPrefixes(vg, cc);
3003 
3004     dumpPreRoseGraph(vg, cc.grey, "post_prefix_rose.dot");
3005 
3006     extractStrongLiterals(vg, cc);
3007     dumpPreRoseGraph(vg, cc.grey, "post_extract_rose.dot");
3008     improveWeakInfixes(vg, cc);
3009     dumpPreRoseGraph(vg, cc.grey, "post_infix_rose.dot");
3010 
3011     /* Step 3: avoid output exposed engines if there is a strong trailing
3012        literal) */
3013     avoidSuffixes(vg, cc);
3014 
3015     /* Step 4: look for infixes/suffixes with leading .*literals
3016      * This can reduce the amount of work a heavily picked literal has to do and
3017      * reduce the amount of state used as .* is handled internally to rose. */
3018     lookForDoubleCut(vg, cc);
3019 
3020     if (cc.streaming) {
3021         lookForCleanEarlySplits(vg, cc);
3022         decomposeLiteralChains(vg, cc);
3023     }
3024 
3025     rehomeEodSuffixes(vg);
3026     removeRedundantLiterals(vg, cc);
3027 
3028     pruneUseless(vg);
3029     dumpPreRoseGraph(vg, cc.grey);
3030     renumber_vertices(vg);
3031     calcVertexOffsets(vg);
3032 
3033     return vg;
3034 }
3035 
doViolet(RoseBuild & rose,const NGHolder & h,bool prefilter,bool last_chance,const ReportManager & rm,const CompileContext & cc)3036 bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
3037               bool last_chance, const ReportManager &rm,
3038               const CompileContext &cc) {
3039     auto vg = doInitialVioletTransform(h, last_chance, cc);
3040     if (num_vertices(vg) <= 2) {
3041         return false;
3042     }
3043 
3044     /* Step 5: avoid unimplementable, or overly large engines if possible */
3045     if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) {
3046         return false;
3047     }
3048     dumpPreRoseGraph(vg, cc.grey, "post_ensure_rose.dot");
3049 
3050     /* Step 6: send to rose */
3051     bool rv = rose.addRose(vg, prefilter);
3052     DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail");
3053     return rv;
3054 }
3055 
checkViolet(const ReportManager & rm,const NGHolder & h,bool prefilter,const CompileContext & cc)3056 bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter,
3057                  const CompileContext &cc) {
3058     auto vg = doInitialVioletTransform(h, true, cc);
3059     if (num_vertices(vg) <= 2) {
3060         return false;
3061     }
3062 
3063     bool rv = roseCheckRose(vg, prefilter, rm, cc);
3064     DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail");
3065     return rv;
3066 }
3067 
3068 }
3069