1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Miscellaneous NFA graph utilities.
31  */
32 #include "ng_util.h"
33 
34 #include "grey.h"
35 #include "ng_dump.h"
36 #include "ng_prune.h"
37 #include "ue2common.h"
38 #include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS.
39 #include "parser/position.h"
40 #include "util/graph_range.h"
41 #include "util/graph_small_color_map.h"
42 #include "util/make_unique.h"
43 #include "util/order_check.h"
44 #include "util/ue2string.h"
45 #include "util/report_manager.h"
46 
47 #include <limits>
48 #include <map>
49 #include <set>
50 #include <unordered_map>
51 #include <unordered_set>
52 
53 #include <boost/graph/filtered_graph.hpp>
54 #include <boost/graph/topological_sort.hpp>
55 #include <boost/range/adaptor/map.hpp>
56 
57 using namespace std;
58 using boost::make_filtered_graph;
59 using boost::make_assoc_property_map;
60 
61 namespace ue2 {
62 
getSoleDestVertex(const NGHolder & g,NFAVertex a)63 NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) {
64     assert(a != NGHolder::null_vertex());
65 
66     NGHolder::out_edge_iterator ii, iie;
67     tie(ii, iie) = out_edges(a, g);
68     if (ii == iie) {
69         return NGHolder::null_vertex();
70     }
71     NFAVertex b = target(*ii, g);
72     if (a == b) {
73         ++ii;
74         if (ii == iie) {
75             return NGHolder::null_vertex();
76         }
77 
78         b = target(*ii, g);
79         if (++ii != iie) {
80             return NGHolder::null_vertex();
81         }
82     } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) {
83         return NGHolder::null_vertex();
84     }
85 
86     assert(a != b);
87     return b;
88 }
89 
getSoleSourceVertex(const NGHolder & g,NFAVertex a)90 NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) {
91     assert(a != NGHolder::null_vertex());
92 
93     u32 idegree = in_degree(a, g);
94     if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) {
95         return NGHolder::null_vertex();
96     }
97 
98     NGHolder::in_edge_iterator ii, iie;
99     tie(ii, iie) = in_edges(a, g);
100     if (ii == iie) {
101         return NGHolder::null_vertex();
102     }
103     NFAVertex b = source(*ii, g);
104     if (a == b) {
105         ++ii;
106         if (ii == iie) {
107             return NGHolder::null_vertex();
108         }
109 
110         b = source(*ii, g);
111     }
112 
113     assert(a != b);
114     return b;
115 }
116 
clone_vertex(NGHolder & g,NFAVertex v)117 NFAVertex clone_vertex(NGHolder &g, NFAVertex v) {
118     NFAVertex clone = add_vertex(g);
119     u32 idx = g[clone].index;
120     g[clone] = g[v];
121     g[clone].index = idx;
122 
123     return clone;
124 }
125 
clone_out_edges(NGHolder & g,NFAVertex source,NFAVertex dest)126 void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) {
127     for (const auto &e : out_edges_range(source, g)) {
128         NFAVertex t = target(e, g);
129         if (edge(dest, t, g).second) {
130             continue;
131         }
132         NFAEdge clone = add_edge(dest, t, g);
133         u32 idx = g[clone].index;
134         g[clone] = g[e];
135         g[clone].index = idx;
136     }
137 }
138 
clone_in_edges(NGHolder & g,NFAVertex s,NFAVertex dest)139 void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) {
140     for (const auto &e : in_edges_range(s, g)) {
141         NFAVertex ss = source(e, g);
142         assert(!edge(ss, dest, g).second);
143         NFAEdge clone = add_edge(ss, dest, g);
144         u32 idx = g[clone].index;
145         g[clone] = g[e];
146         g[clone].index = idx;
147     }
148 }
149 
onlyOneTop(const NGHolder & g)150 bool onlyOneTop(const NGHolder &g) {
151     return getTops(g).size() == 1;
152 }
153 
154 namespace {
155 struct CycleFound {};
156 struct DetectCycles : public boost::default_dfs_visitor {
DetectCyclesue2::__anond22ca4f30111::DetectCycles157     explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {}
back_edgeue2::__anond22ca4f30111::DetectCycles158     void back_edge(const NFAEdge &e, const NGHolder &g) const {
159         NFAVertex u = source(e, g), v = target(e, g);
160         // We ignore the startDs self-loop.
161         if (u == startDs && v == startDs) {
162             return;
163         }
164         // Any other back-edge indicates a cycle.
165         DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index);
166         throw CycleFound();
167     }
168 private:
169     const NFAVertex startDs;
170 };
171 } // namespace
172 
isVacuous(const NGHolder & h)173 bool isVacuous(const NGHolder &h) {
174     return edge(h.start, h.accept, h).second
175         || edge(h.start, h.acceptEod, h).second
176         || edge(h.startDs, h.accept, h).second
177         || edge(h.startDs, h.acceptEod, h).second;
178 }
179 
isAnchored(const NGHolder & g)180 bool isAnchored(const NGHolder &g) {
181     for (auto v : adjacent_vertices_range(g.startDs, g)) {
182         if (v != g.startDs) {
183             return false;
184         }
185     }
186     return true;
187 }
188 
isFloating(const NGHolder & g)189 bool isFloating(const NGHolder &g) {
190     for (auto v : adjacent_vertices_range(g.start, g)) {
191         if (v != g.startDs && !edge(g.startDs, v, g).second) {
192             return false;
193         }
194     }
195     return true;
196 }
197 
isAcyclic(const NGHolder & g)198 bool isAcyclic(const NGHolder &g) {
199     try {
200         boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g),
201                                   g.start);
202     } catch (const CycleFound &) {
203         return false;
204     }
205 
206     return true;
207 }
208 
209 /** True if the graph has a cycle reachable from the given source vertex. */
hasReachableCycle(const NGHolder & g,NFAVertex src)210 bool hasReachableCycle(const NGHolder &g, NFAVertex src) {
211     assert(hasCorrectlyNumberedVertices(g));
212 
213     try {
214         // Use depth_first_visit, rather than depth_first_search, so that we
215         // only search from src.
216         boost::depth_first_visit(g, src, DetectCycles(g),
217                                  make_small_color_map(g));
218     } catch (const CycleFound &) {
219         return true;
220     }
221 
222     return false;
223 }
224 
hasBigCycles(const NGHolder & g)225 bool hasBigCycles(const NGHolder &g) {
226     assert(hasCorrectlyNumberedVertices(g));
227     set<NFAEdge> dead;
228     BackEdges<set<NFAEdge>> backEdgeVisitor(dead);
229     boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g),
230                               g.start);
231 
232     for (const auto &e : dead) {
233         if (source(e, g) != target(e, g)) {
234             return true;
235         }
236     }
237 
238     return false;
239 }
240 
hasNarrowReachVertex(const NGHolder & g,size_t max_reach_count)241 bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) {
242     return any_of_in(vertices_range(g), [&](NFAVertex v) {
243         return !is_special(v, g) && g[v].char_reach.count() < max_reach_count;
244     });
245 }
246 
can_never_match(const NGHolder & g)247 bool can_never_match(const NGHolder &g) {
248     assert(edge(g.accept, g.acceptEod, g).second);
249     if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
250         DEBUG_PRINTF("no paths into accept\n");
251         return true;
252     }
253 
254     return false;
255 }
256 
can_match_at_eod(const NGHolder & h)257 bool can_match_at_eod(const NGHolder &h) {
258     if (in_degree(h.acceptEod, h) > 1) {
259         DEBUG_PRINTF("more than one edge to acceptEod\n");
260         return true;
261     }
262 
263     for (auto e : in_edges_range(h.accept, h)) {
264         if (h[e].assert_flags) {
265             DEBUG_PRINTF("edge to accept has assert flags %d\n",
266                          h[e].assert_flags);
267             return true;
268         }
269     }
270 
271     return false;
272 }
273 
can_only_match_at_eod(const NGHolder & g)274 bool can_only_match_at_eod(const NGHolder &g) {
275     NGHolder::in_edge_iterator ie, ee;
276     tie(ie, ee) = in_edges(g.accept, g);
277 
278     return ie == ee;
279 }
280 
matches_everywhere(const NGHolder & h)281 bool matches_everywhere(const NGHolder &h) {
282     NFAEdge e = edge(h.startDs, h.accept, h);
283 
284     return e && !h[e].assert_flags;
285 }
286 
is_virtual_start(NFAVertex v,const NGHolder & g)287 bool is_virtual_start(NFAVertex v, const NGHolder &g) {
288     return g[v].assert_flags & POS_FLAG_VIRTUAL_START;
289 }
290 
291 static
reorderSpecials(const NGHolder & g,vector<NFAVertex> & topoOrder)292 void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) {
293     // Start is last element of reverse topo ordering.
294     auto it = find(topoOrder.begin(), topoOrder.end(), g.start);
295     if (it != topoOrder.end() - 1) {
296         DEBUG_PRINTF("repositioning start\n");
297         assert(it != topoOrder.end());
298         topoOrder.erase(it);
299         topoOrder.insert(topoOrder.end(), g.start);
300     }
301 
302     // StartDs is second-to-last element of reverse topo ordering.
303     it = find(topoOrder.begin(), topoOrder.end(), g.startDs);
304     if (it != topoOrder.end() - 2) {
305         DEBUG_PRINTF("repositioning start ds\n");
306         assert(it != topoOrder.end());
307         topoOrder.erase(it);
308         topoOrder.insert(topoOrder.end() - 1, g.startDs);
309     }
310 
311     // AcceptEOD is first element of reverse topo ordering.
312     it = find(topoOrder.begin(), topoOrder.end(), g.acceptEod);
313     if (it != topoOrder.begin()) {
314         DEBUG_PRINTF("repositioning accept\n");
315         assert(it != topoOrder.end());
316         topoOrder.erase(it);
317         topoOrder.insert(topoOrder.begin(), g.acceptEod);
318     }
319 
320     // Accept is second element of reverse topo ordering, if it's connected.
321     it = find(topoOrder.begin(), topoOrder.end(), g.accept);
322     if (it != topoOrder.begin() + 1) {
323         DEBUG_PRINTF("repositioning accept\n");
324         assert(it != topoOrder.end());
325         topoOrder.erase(it);
326         if (in_degree(g.accept, g) != 0) {
327             topoOrder.insert(topoOrder.begin() + 1, g.accept);
328         }
329     }
330 }
331 
getTopoOrdering(const NGHolder & g)332 vector<NFAVertex> getTopoOrdering(const NGHolder &g) {
333     assert(hasCorrectlyNumberedVertices(g));
334 
335     // Use the same colour map for both DFS and topological_sort below: avoids
336     // having to reallocate it, etc.
337     auto colors = make_small_color_map(g);
338 
339     using EdgeSet = unordered_set<NFAEdge>;
340     EdgeSet backEdges;
341     BackEdges<EdgeSet> be(backEdges);
342 
343     depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors));
344 
345     auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges));
346 
347     vector<NFAVertex> ordering;
348     ordering.reserve(num_vertices(g));
349     topological_sort(acyclic_g, back_inserter(ordering), color_map(colors));
350 
351     reorderSpecials(g, ordering);
352 
353     return ordering;
354 }
355 
356 static
mustBeSetBefore_int(NFAVertex u,const NGHolder & g,decltype(make_small_color_map (NGHolder ()))& colors)357 void mustBeSetBefore_int(NFAVertex u, const NGHolder &g,
358                          decltype(make_small_color_map(NGHolder())) &colors) {
359     set<NFAVertex> s;
360     insert(&s, adjacent_vertices(u, g));
361 
362     set<NFAEdge> dead; // Edges leading to u or u's successors.
363 
364     for (auto v : inv_adjacent_vertices_range(u, g)) {
365         for (const auto &e : out_edges_range(v, g)) {
366             NFAVertex t = target(e, g);
367             if (t == u || contains(s, t)) {
368                 dead.insert(e);
369             }
370         }
371     }
372 
373     auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead));
374 
375     depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()),
376                       colors);
377 }
378 
mustBeSetBefore(NFAVertex u,NFAVertex v,const NGHolder & g,mbsb_cache & cache)379 bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
380                      mbsb_cache &cache) {
381     assert(&cache.g == &g);
382     auto key = make_pair(g[u].index, g[v].index);
383     DEBUG_PRINTF("cache checking (%zu)\n", cache.cache.size());
384     if (contains(cache.cache, key)) {
385         DEBUG_PRINTF("cache hit\n");
386         return cache.cache[key];
387     }
388 
389     auto colors = make_small_color_map(g);
390     mustBeSetBefore_int(u, g, colors);
391 
392     for (auto vi : vertices_range(g)) {
393         auto key2 = make_pair(g[u].index, g[vi].index);
394         DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second);
395         assert(!contains(cache.cache, key2));
396         bool value = get(colors, vi) == small_color::white;
397         cache.cache[key2] = value;
398         assert(contains(cache.cache, key2));
399     }
400     DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second,
401                  cache.cache.size());
402     return cache.cache[key];
403 }
404 
appendLiteral(NGHolder & h,const ue2_literal & s)405 void appendLiteral(NGHolder &h, const ue2_literal &s) {
406     DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str());
407     vector<NFAVertex> tail;
408     assert(in_degree(h.acceptEod, h) == 1);
409     for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
410         tail.push_back(v);
411     }
412     assert(!tail.empty());
413 
414     for (auto v : tail) {
415         remove_edge(v, h.accept, h);
416     }
417 
418     for (const auto &c : s) {
419         NFAVertex v = add_vertex(h);
420         h[v].char_reach = c;
421         for (auto u : tail) {
422             add_edge(u, v, h);
423         }
424         tail.clear();
425         tail.push_back(v);
426     }
427 
428     for (auto v : tail) {
429         add_edge(v, h.accept, h);
430     }
431 }
432 
getTops(const NGHolder & h)433 flat_set<u32> getTops(const NGHolder &h) {
434     flat_set<u32> tops;
435     for (const auto &e : out_edges_range(h.start, h)) {
436         insert(&tops, h[e].tops);
437     }
438     return tops;
439 }
440 
setTops(NGHolder & h,u32 top)441 void setTops(NGHolder &h, u32 top) {
442     for (const auto &e : out_edges_range(h.start, h)) {
443         assert(h[e].tops.empty());
444         if (target(e, h) == h.startDs) {
445             continue;
446         }
447         h[e].tops.insert(top);
448     }
449 }
450 
clearReports(NGHolder & g)451 void clearReports(NGHolder &g) {
452     DEBUG_PRINTF("clearing reports without an accept edge\n");
453     unordered_set<NFAVertex> allow;
454     insert(&allow, inv_adjacent_vertices(g.accept, g));
455     insert(&allow, inv_adjacent_vertices(g.acceptEod, g));
456     allow.erase(g.accept); // due to stylised edge.
457 
458     for (auto v : vertices_range(g)) {
459         if (contains(allow, v)) {
460             continue;
461         }
462         g[v].reports.clear();
463     }
464 }
465 
duplicateReport(NGHolder & g,ReportID r_old,ReportID r_new)466 void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) {
467     for (auto v : vertices_range(g)) {
468         auto &reports = g[v].reports;
469         if (contains(reports, r_old)) {
470             reports.insert(r_new);
471         }
472     }
473 }
474 
475 static
fillHolderOutEdges(NGHolder & out,const NGHolder & in,const unordered_map<NFAVertex,NFAVertex> & v_map,NFAVertex u)476 void fillHolderOutEdges(NGHolder &out, const NGHolder &in,
477                         const unordered_map<NFAVertex, NFAVertex> &v_map,
478                         NFAVertex u) {
479     NFAVertex u_new = v_map.at(u);
480 
481     for (auto e : out_edges_range(u, in)) {
482         NFAVertex v = target(e, in);
483 
484         if (is_special(u, in) && is_special(v, in)) {
485             continue;
486         }
487 
488         auto it = v_map.find(v);
489         if (it == v_map.end()) {
490             continue;
491         }
492         NFAVertex v_new = it->second;
493         assert(!edge(u_new, v_new, out).second);
494         add_edge(u_new, v_new, in[e], out);
495     }
496 }
497 
fillHolder(NGHolder * outp,const NGHolder & in,const deque<NFAVertex> & vv,unordered_map<NFAVertex,NFAVertex> * v_map_out)498 void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv,
499                 unordered_map<NFAVertex, NFAVertex> *v_map_out) {
500     NGHolder &out = *outp;
501     unordered_map<NFAVertex, NFAVertex> &v_map = *v_map_out;
502 
503     out.kind = in.kind;
504 
505     for (auto v : vv) {
506         if (is_special(v, in)) {
507             continue;
508         }
509         v_map[v] = add_vertex(in[v], out);
510     }
511 
512     for (u32 i = 0; i < N_SPECIALS; i++) {
513         v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i);
514     }
515 
516     DEBUG_PRINTF("copied %zu vertices to NG graph\n", v_map.size());
517 
518     fillHolderOutEdges(out, in, v_map, in.start);
519     fillHolderOutEdges(out, in, v_map, in.startDs);
520 
521     for (auto u : vv) {
522         if (is_special(u, in)) {
523             continue;
524         }
525         fillHolderOutEdges(out, in, v_map, u);
526     }
527 
528     renumber_edges(out);
529     renumber_vertices(out);
530 }
531 
cloneHolder(NGHolder & out,const NGHolder & in)532 void cloneHolder(NGHolder &out, const NGHolder &in) {
533     assert(hasCorrectlyNumberedVertices(in));
534     assert(hasCorrectlyNumberedVertices(out));
535     out.kind = in.kind;
536 
537     // Note: depending on the state of the input graph, some stylized edges
538     // (e.g. start->startDs) may not exist. This must be propagated to the
539     // output graph as well.
540 
541     /* remove the existing special edges */
542     clear_vertex(out.startDs, out);
543     clear_vertex(out.accept, out);
544     renumber_edges(out);
545 
546     vector<NFAVertex> out_mapping(num_vertices(in));
547     out_mapping[NODE_START] = out.start;
548     out_mapping[NODE_START_DOTSTAR] = out.startDs;
549     out_mapping[NODE_ACCEPT] = out.accept;
550     out_mapping[NODE_ACCEPT_EOD] = out.acceptEod;
551 
552     for (auto v : vertices_range(in)) {
553         u32 i = in[v].index;
554 
555         /* special vertices are already in the out graph */
556         if (i >= N_SPECIALS) {
557             assert(!out_mapping[i]);
558             out_mapping[i] = add_vertex(in[v], out);
559         }
560 
561         out[out_mapping[i]] = in[v];
562     }
563 
564     for (auto e : edges_range(in)) {
565         u32 si = in[source(e, in)].index;
566         u32 ti = in[target(e, in)].index;
567 
568         DEBUG_PRINTF("adding edge %u->%u\n", si, ti);
569 
570         NFAVertex s = out_mapping[si];
571         NFAVertex t = out_mapping[ti];
572         NFAEdge e2 = add_edge(s, t, out);
573         out[e2] = in[e];
574     }
575 
576     // Safety checks.
577     assert(num_vertices(in) == num_vertices(out));
578     assert(num_edges(in) == num_edges(out));
579     assert(hasCorrectlyNumberedVertices(out));
580 }
581 
cloneHolder(NGHolder & out,const NGHolder & in,unordered_map<NFAVertex,NFAVertex> * mapping)582 void cloneHolder(NGHolder &out, const NGHolder &in,
583                  unordered_map<NFAVertex, NFAVertex> *mapping) {
584     cloneHolder(out, in);
585     vector<NFAVertex> out_verts(num_vertices(in));
586     for (auto v : vertices_range(out)) {
587         out_verts[out[v].index] = v;
588     }
589 
590     mapping->clear();
591 
592     for (auto v : vertices_range(in)) {
593         (*mapping)[v] = out_verts[in[v].index];
594         assert((*mapping)[v]);
595     }
596 }
597 
cloneHolder(const NGHolder & in)598 unique_ptr<NGHolder> cloneHolder(const NGHolder &in) {
599     unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>();
600     cloneHolder(*h, in);
601     return h;
602 }
603 
reverseHolder(const NGHolder & g_in,NGHolder & g)604 void reverseHolder(const NGHolder &g_in, NGHolder &g) {
605     // Make the BGL do the grunt work.
606     unordered_map<NFAVertex, NFAVertex> vertexMap;
607     boost::transpose_graph(g_in, g,
608                 orig_to_copy(boost::make_assoc_property_map(vertexMap)));
609 
610     // The transpose_graph operation will have created extra copies of our
611     // specials. We have to rewire their neighbours to the 'real' specials and
612     // delete them.
613     NFAVertex start = vertexMap[g_in.acceptEod];
614     NFAVertex startDs = vertexMap[g_in.accept];
615     NFAVertex accept = vertexMap[g_in.startDs];
616     NFAVertex acceptEod = vertexMap[g_in.start];
617 
618     // Successors of starts.
619     for (const auto &e : out_edges_range(start, g)) {
620         NFAVertex v = target(e, g);
621         add_edge(g.start, v, g[e], g);
622     }
623     for (const auto &e : out_edges_range(startDs, g)) {
624         NFAVertex v = target(e, g);
625         add_edge(g.startDs, v, g[e], g);
626     }
627 
628     // Predecessors of accepts.
629     for (const auto &e : in_edges_range(accept, g)) {
630         NFAVertex u = source(e, g);
631         add_edge(u, g.accept, g[e], g);
632     }
633     for (const auto &e : in_edges_range(acceptEod, g)) {
634         NFAVertex u = source(e, g);
635         add_edge(u, g.acceptEod, g[e], g);
636     }
637 
638     // Remove our impostors.
639     clear_vertex(start, g);
640     remove_vertex(start, g);
641     clear_vertex(startDs, g);
642     remove_vertex(startDs, g);
643     clear_vertex(accept, g);
644     remove_vertex(accept, g);
645     clear_vertex(acceptEod, g);
646     remove_vertex(acceptEod, g);
647 
648     // Renumber so that g's properties (number of vertices, edges) are
649     // accurate.
650     renumber_vertices(g);
651     renumber_edges(g);
652 
653     assert(num_vertices(g) == num_vertices(g_in));
654     assert(num_edges(g) == num_edges(g_in));
655 }
656 
removeTrailingLiteralStates(NGHolder & g,const ue2_literal & lit,u32 max_delay,bool overhang_ok)657 u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
658                                 u32 max_delay, bool overhang_ok) {
659     assert(isCorrectlyTopped(g));
660     if (max_delay == numeric_limits<u32>::max()) {
661         max_delay--;
662     }
663 
664     DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str());
665     set<NFAVertex> curr, next;
666     curr.insert(g.accept);
667 
668     auto it = lit.rbegin();
669     for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) {
670         next.clear();
671         for (auto v : curr) {
672             for (auto u : inv_adjacent_vertices_range(v, g)) {
673                 if (u == g.start) {
674                     if (overhang_ok) {
675                         DEBUG_PRINTF("bail\n");
676                         goto bail; /* things got complicated */
677                     } else {
678                         continue; /* it is not possible for a lhs literal to
679                                    * overhang the start */
680                     }
681                 }
682 
683                 const CharReach &cr = g[u].char_reach;
684                 if (!overlaps(*it, cr)) {
685                     DEBUG_PRINTF("skip\n");
686                     continue;
687                 }
688                 if (isSubsetOf(*it, cr)) {
689                     next.insert(u);
690                 } else {
691                     DEBUG_PRINTF("bail\n");
692                     goto bail; /* things got complicated */
693                 }
694             }
695         }
696 
697         curr.swap(next);
698     }
699  bail:
700     if (curr.empty()) {
701         /* This can happen when we have an edge representing a cross from two
702          * sides of an alternation. This whole edge needs to be marked as
703          * dead */
704         assert(0); /* should have been picked up by can match */
705         return numeric_limits<u32>::max();
706     }
707 
708     u32 delay = distance(lit.rbegin(), it);
709     assert(delay <= max_delay);
710     assert(delay <= lit.length());
711     DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay);
712 
713     set<NFAVertex> pred;
714     for (auto v : curr) {
715         insert(&pred, inv_adjacent_vertices_range(v, g));
716     }
717 
718     clear_in_edges(g.accept, g);
719     clearReports(g);
720 
721     for (auto v : pred) {
722         NFAEdge e = add_edge(v, g.accept, g);
723         g[v].reports.insert(0);
724         if (is_triggered(g) && v == g.start) {
725             g[e].tops.insert(DEFAULT_TOP);
726         }
727     }
728 
729     pruneUseless(g);
730     assert(allMatchStatesHaveReports(g));
731     assert(isCorrectlyTopped(g));
732 
733     DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
734     return delay;
735 }
736 
737 #ifndef NDEBUG
738 
allMatchStatesHaveReports(const NGHolder & g)739 bool allMatchStatesHaveReports(const NGHolder &g) {
740     unordered_set<NFAVertex> reporters;
741     for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
742         if (g[v].reports.empty()) {
743             DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index);
744             return false;
745         }
746         reporters.insert(v);
747     }
748 
749     for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
750         if (v == g.accept) {
751             continue; // stylised edge
752         }
753         if (g[v].reports.empty()) {
754             DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index);
755             return false;
756         }
757         reporters.insert(v);
758     }
759 
760     for (auto v : vertices_range(g)) {
761         if (!contains(reporters, v) && !g[v].reports.empty()) {
762             DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n",
763                          g[v].index);
764             return false;
765         }
766     }
767 
768     return true;
769 }
770 
isCorrectlyTopped(const NGHolder & g)771 bool isCorrectlyTopped(const NGHolder &g) {
772     if (is_triggered(g)) {
773         for (const auto &e : out_edges_range(g.start, g)) {
774             if (g[e].tops.empty() != (target(e, g) == g.startDs)) {
775                 return false;
776             }
777         }
778     } else {
779         for (const auto &e : out_edges_range(g.start, g)) {
780             if (!g[e].tops.empty()) {
781                 return false;
782             }
783         }
784     }
785 
786     return true;
787 }
788 
789 #endif // NDEBUG
790 
791 } // namespace ue2
792