1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief NFA graph reductions.
31  *
32  * This code attempts to make the NFA graph smaller by performing a number of
33  * local transformations:
34  *
35  * ### (1) removal of redundant vertices:
36  *
37  *      v is redundant wrt to u if succ(v) is a subset of succ(u)
38  *                            AND pred(v) is a subset of pred(u)
39  *                            AND cr(v) is a subset of cr(u)
40  *
41  * ### (2) 'diamond' transformation:
42  *
43  *      given succ(v) == succ(u) and pred(v) == pred(u),
44  *      v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
45  *                                   and cr(w) = union(cr(v), cr(u))
46  *
47  * ### (3) locally identifiable left equivalence:
48  *
49  *     given pred(v) == pred(u) (**) and cr(v) == cr(u),
50  *     v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
51  *                                  and succ(w) = union(succ(v), succ(u))
52  *
53  * ### (4) locally identifiable right equivalence:
54  *
55  *     given succ(v) == succ(u) (**) and cr(v) == cr(u),
56  *     v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
57  *                                  and pred(w) = union(pred(v), pred(u))
58  *
59  * NOTE (**): for left and right equivalence, we can also do the transform if
60  * set(u) contains u, set(v) contains v and the sets are otherwise equal. This
61  * enables equivalent vertices with self-loops to be merged.
62  *
63  * If v and u raise accepts, they can only be merged if they raise the same
64  * report IDs.
65  *
66  * Transformations are applied repeatedly until the graph stops changing.
67  *
68  * Note that the final graph may depend on the order in which these
69  * transformations are applied. In order to reduce the non-determinism the
70  * following order is imposed: (1); (2); (3) + (4).
71  */
72 #include "ng_redundancy.h"
73 
74 #include "ng_holder.h"
75 #include "ng_calc_components.h"
76 #include "ng_dominators.h"
77 #include "ng_prune.h"
78 #include "ng_util.h"
79 #include "ue2common.h"
80 #include "util/container.h"
81 #include "util/flat_containers.h"
82 #include "util/graph_range.h"
83 
84 #include <algorithm>
85 #include <cassert>
86 #include <map>
87 #include <set>
88 #include <vector>
89 
90 #include <boost/graph/depth_first_search.hpp>
91 #include <boost/graph/reverse_graph.hpp>
92 
93 using namespace std;
94 
95 namespace ue2 {
96 
97 namespace {
98 
99 /** Precalculated (and maintained) information about a vertex. */
100 class VertexInfo {
101 public:
102     flat_set<NFAVertex> pred; //!< predecessors of this vertex
103     flat_set<NFAVertex> succ; //!< successors of this vertex
104     bool isAccept = false;    //!< does this vertex lead to accept?
105     bool isRemoved = false;   //!< have we already removed this vertex?
106 
inDegree() const107     size_t inDegree() const { return pred.size(); }
outDegree() const108     size_t outDegree() const { return succ.size(); }
109 };
110 
111 class VertexInfoMap {
112 public:
VertexInfoMap(const NGHolder & gg)113     explicit VertexInfoMap(const NGHolder &gg)
114         : g(gg), infos(num_vertices(gg)) {}
operator [](NFAVertex v)115     VertexInfo &operator[](NFAVertex v) {
116         u32 i = g[v].index;
117         assert(i < infos.size());
118         return infos[i];
119     }
120 
operator [](NFAVertex v) const121     const VertexInfo &operator[](NFAVertex v) const {
122         u32 i = g[v].index;
123         assert(i < infos.size());
124         return infos[i];
125     }
126 
127 private:
128     const NGHolder &g;
129     vector<VertexInfo> infos;
130 };
131 
132 } // namespace
133 
134 /** Populates the info map with their predecessor and successor states, and
135  * whether they are accept states. */
136 static
populateContainers(const NGHolder & g,VertexInfoMap & infoMap)137 void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
138     for (auto v : vertices_range(g)) {
139         VertexInfo &info = infoMap[v];
140         assert(info.pred.empty() && info.succ.empty());
141 
142         // Build successor and predecessor sets
143         insert(&info.pred, inv_adjacent_vertices(v, g));
144         insert(&info.succ, adjacent_vertices(v, g));
145 
146         // Note whether the vertex is an accept state
147         if (!is_special(v, g)) {
148             if (contains(info.succ, g.accept)
149                 || contains(info.succ, g.acceptEod)) {
150                 info.isAccept = true;
151             }
152         }
153     }
154 }
155 
156 /** Helper function to take the intersection of two sorted vertex sets
157  * in-place. */
158 static
inplaceIntersection(vector<NFAVertex> & vset1,const flat_set<NFAVertex> & vset2)159 void inplaceIntersection(vector<NFAVertex> &vset1,
160                          const flat_set<NFAVertex> &vset2) {
161     const NFAVertex GONE = NGHolder::null_vertex();
162 
163     vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
164     flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
165 
166     while ((it != ite) && (jt != jte)) {
167         assert(*it != GONE);
168 
169         if (*it < *jt) {
170             // present in vset1 but not in vset2. Set to null, remove in a
171             // second pass.
172             *it = GONE;
173             ++it;
174         } else if (*jt < *it) {
175             // present in vset2 but not in vset1, skip.
176             ++jt;
177         } else {
178             // present in both sets.
179             ++it; ++jt;
180         }
181     }
182 
183     // Left overs are only in that set.
184     vset1.erase(it, ite);
185 
186     // Remove nulls created above.
187     vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
188 }
189 
190 /** Find the intersection of the successors of our predecessors. */
191 static
succPredIntersection(const NFAVertex v,const flat_set<NFAVertex> & predSet,const VertexInfoMap & infoMap,vector<NFAVertex> & intersection,bool considerSelf=true)192 void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
193                           const VertexInfoMap &infoMap,
194                           vector<NFAVertex> &intersection,
195                           bool considerSelf = true /* follow self loops */) {
196     /* find a good seed for the intersection */
197     const flat_set<NFAVertex> *best = nullptr;
198     for (auto u : predSet) {
199         if (!considerSelf && u == v) {
200             continue;
201         }
202 
203         const flat_set<NFAVertex> &succSet = infoMap[u].succ;
204         if (!best || succSet.size() <= best->size()) {
205             best = &succSet;
206 
207             // Break out if we've reduced our intersection to [v]
208             if (best->size() == 1) {
209                 assert(*(best->begin()) == v);
210                 intersection.push_back(v);
211                 return;
212             }
213         }
214     }
215 
216     if (best) {
217         insert(&intersection, intersection.end(), *best);
218     }
219 
220     for (auto u : predSet) {
221         if (!considerSelf && u == v) {
222             continue;
223         }
224 
225         inplaceIntersection(intersection, infoMap[u].succ);
226 
227         // Check: intersection should always be at least size 1
228         assert(!intersection.empty());
229 
230         // Break out if we've reduced our intersection to [v]
231         if (intersection.size() == 1) {
232             assert(*intersection.begin() == v);
233             return;
234         }
235     }
236 }
237 
238 /** Find the intersection of the predecessors of our successors. */
239 static
predSuccIntersection(const NFAVertex v,const flat_set<NFAVertex> & succSet,const VertexInfoMap & infoMap,vector<NFAVertex> & intersection,bool considerSelf=true)240 void predSuccIntersection(const NFAVertex v,
241                           const flat_set<NFAVertex> &succSet,
242                           const VertexInfoMap &infoMap,
243                           vector<NFAVertex> &intersection,
244                           bool considerSelf = true /* follow self loops */) {
245     /* find a good seed for the intersection */
246     const flat_set<NFAVertex> *best = nullptr;
247     for (auto w : succSet) {
248         if (!considerSelf && w == v) {
249             continue;
250         }
251 
252         const flat_set<NFAVertex> &predSet = infoMap[w].pred;
253         if (!best || predSet.size() <= best->size()) {
254             best = &predSet;
255 
256             // Break out if we've reduced our intersection to [v]
257             if (best->size() == 1) {
258                 assert(*(best->begin()) == v);
259                 intersection.push_back(v);
260                 return;
261             }
262         }
263     }
264 
265     if (best) {
266         insert(&intersection, intersection.end(), *best);
267     }
268 
269     for (auto w : succSet) {
270         if (!considerSelf && w == v) {
271             continue;
272         }
273 
274         inplaceIntersection(intersection, infoMap[w].pred);
275 
276         // Check: intersection should always be at least size 1
277         assert(!intersection.empty());
278 
279         // Break out if we've reduced our intersection to [v]
280         if (intersection.size() == 1) {
281             assert(*intersection.begin() == v);
282             return;
283         }
284     }
285 }
286 
287 /** Update containers to take into account the removal of vertex v. */
288 static
markForRemoval(const NFAVertex v,VertexInfoMap & infoMap,set<NFAVertex> & removable)289 void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
290                     set<NFAVertex> &removable) {
291     VertexInfo &info = infoMap[v];
292     assert(!info.isRemoved);
293     assert(!contains(removable, v));
294     info.isRemoved = true;
295     removable.insert(v);
296 
297     // remove v from its predecessors' successors
298     for (auto u : info.pred) {
299         infoMap[u].succ.erase(v);
300     }
301 
302     // remove v from its successors' predecessors
303     for (auto w : info.succ) {
304         infoMap[w].pred.erase(v);
305     }
306 }
307 
308 static
hasInEdgeTops(const NGHolder & g,NFAVertex v)309 bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
310     NFAEdge e = edge(g.start, v, g);
311     return e && !g[e].tops.empty();
312 }
313 
314 /** Transform (1), removal of redundant vertices. */
315 static
doUselessMergePass(NGHolder & g,som_type som,VertexInfoMap & infoMap,set<NFAVertex> & removable)316 bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
317                         set<NFAVertex> &removable) {
318     /* useless merges can be done in any order, no need to take any care with
319      * ordering */
320 
321     // Temporary vectors used for intersections below
322     vector<NFAVertex> succPredSet, predSuccSet, intersection;
323 
324     bool changed = false;
325     for (auto v : vertices_range(g)) {
326         VertexInfo &info = infoMap[v];
327 
328         if (info.isRemoved) {
329             continue;
330         }
331 
332         assert(!contains(removable, v));
333 
334         if (is_special(v, g)) {
335             continue;
336         }
337 
338         /* we do not need to check for out edge tops - as only specials (start)
339          * can have tops and they are already disqualified. */
340         if (hasInEdgeTops(g, v)) {
341             continue; // Conservatively skip anything with nonzero tops.
342         }
343 
344         if (info.pred.empty() || info.succ.empty()) {
345             DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index);
346             assert(0); // non-special states should always have succ/pred lists
347             continue;
348         }
349 
350         // The following cases are more complex and rely on the intersection of
351         // Succ(Pred(v)) and Pred(Succ(v))
352 
353         // Compute intersections, operating on the smaller set first
354         // Note that we use vectors here, as set_intersection underneath
355         // guarantees sorted output, and vectors were quite a bit
356         // faster than sets or lists.
357 
358         succPredSet.clear();
359         predSuccSet.clear();
360 
361         if (info.pred.size() <= info.succ.size()) {
362             succPredIntersection(v, info.pred, infoMap, succPredSet);
363             if (succPredSet.size() == 1) {
364                 // nobody in here but us chickens
365                 assert(*succPredSet.begin() == v);
366                 continue;
367             }
368             predSuccIntersection(v, info.succ, infoMap, predSuccSet);
369             if (predSuccSet.size() == 1) {
370                 assert(*predSuccSet.begin() == v);
371                 continue;
372             }
373         } else {
374             predSuccIntersection(v, info.succ, infoMap, predSuccSet);
375             if (predSuccSet.size() == 1) {
376                 assert(*predSuccSet.begin() == v);
377                 continue;
378             }
379             succPredIntersection(v, info.pred, infoMap, succPredSet);
380             if (succPredSet.size() == 1) {
381                 assert(*succPredSet.begin() == v);
382                 continue;
383             }
384         }
385 
386         // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
387         intersection.clear();
388         set_intersection(succPredSet.begin(), succPredSet.end(),
389                          predSuccSet.begin(), predSuccSet.end(),
390                          back_inserter(intersection));
391 
392         /* Boring if it is just us in the intersection */
393         if (intersection.size() < 2) {
394             continue;
395         }
396 
397         // Compare char_reach, mark v for removal if any members of
398         // the intersection have an equal or greater reach
399         const CharReach &currReach = g[v].char_reach;
400         const auto &currReports = g[v].reports;
401         for (auto t : intersection) {
402             const VertexInfo &info2 = infoMap[t];
403 
404             /* start is never a succ of a state, so will never be in the
405              * predsucc/succpred intersection */
406             assert(t != g.start);
407 
408             if (t == v || info2.isRemoved) {
409                 continue;
410             }
411 
412             // For each candidate C to make V redundant, check:
413             // if V is an accept state, C must be an accept state for
414             //      the same pattern
415             // pred(C) is a superset of pred(V)
416             // succ(C) is a superset of succ(V)
417             // reach(C) is a superset of reach(V)
418             //
419             // Note: pred/sec tests are covered by the intersections
420             // calculated above.
421 
422             /* note: links to accepts are also tracked in succs */
423             if (info.isAccept && currReports != g[t].reports) {
424                 continue;
425             }
426 
427             if (som) {
428                 if (t == g.startDs) {
429                     continue;
430                 }
431                 if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
432                     continue;
433                 }
434             }
435 
436             /* we do not need to check for out edge tops - as only start
437              * can have tops and it has already been ruled out. */
438             if (hasInEdgeTops(g, t)) {
439                 continue; // Conservatively skip anything with nonzero tops.
440             }
441 
442             CharReach &otherReach = g[t].char_reach;
443             if (currReach.isSubsetOf(otherReach)) {
444                 DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
445                              g[v].index, g[t].index);
446                 markForRemoval(v, infoMap, removable);
447                 changed = true;
448                 break;
449             }
450         }
451     }
452 
453     return changed;
454 }
455 
456 /** Transform (2), diamond merge pass. */
457 static
doDiamondMergePass(NGHolder & g,som_type som,VertexInfoMap & infoMap,set<NFAVertex> & removable)458 bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
459                         set<NFAVertex> &removable) {
460     // Temporary vectors used for intersections below
461     vector<NFAVertex> succPredSet, predSuccSet, intersection;
462 
463     bool changed = false;
464     for (auto v : vertices_range(g)) {
465         VertexInfo &info = infoMap[v];
466 
467         if (info.isRemoved) {
468             continue;
469         }
470 
471         assert(!contains(removable, v));
472 
473         if (is_special(v, g)) {
474             continue;
475         }
476 
477         /* we do not need to check for out edge tops - as only specials (start)
478          * can have tops and they are already disqualified. */
479         if (hasInEdgeTops(g, v)) {
480             continue; // Conservatively skip anything with nonzero tops.
481         }
482 
483         if (info.pred.empty() || info.succ.empty()) {
484             assert(0); // non-special states should always have succ/pred lists
485             continue;
486         }
487 
488         // The following cases are more complex and rely on the intersection of
489         // Succ(Pred(v)) and Pred(Succ(v))
490 
491         // Compute intersections, operating on the smaller set first
492         // Note that we use vectors here, as set_intersection underneath
493         // guarantees sorted output, and vectors were quite a bit faster than
494         // sets or lists.
495 
496         succPredSet.clear();
497         predSuccSet.clear();
498 
499         if (info.pred.size() <= info.succ.size()) {
500             succPredIntersection(v, info.pred, infoMap, succPredSet);
501             if (succPredSet.size() == 1) {
502                 // nobody in here but us chickens
503                 assert(*succPredSet.begin() == v);
504                 continue;
505             }
506             predSuccIntersection(v, info.succ, infoMap, predSuccSet);
507             if (predSuccSet.size() == 1) {
508                 assert(*predSuccSet.begin() == v);
509                 continue;
510             }
511         } else {
512             predSuccIntersection(v, info.succ, infoMap, predSuccSet);
513             if (predSuccSet.size() == 1) {
514                 assert(*predSuccSet.begin() == v);
515                 continue;
516             }
517             succPredIntersection(v, info.pred, infoMap, succPredSet);
518             if (succPredSet.size() == 1) {
519                 assert(*succPredSet.begin() == v);
520                 continue;
521             }
522         }
523 
524         // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
525         intersection.clear();
526         set_intersection(succPredSet.begin(), succPredSet.end(),
527                          predSuccSet.begin(), predSuccSet.end(),
528                          back_inserter(intersection));
529 
530         /* Boring if it is just us in the intersection */
531         if (intersection.size() < 2) {
532             continue;
533         }
534 
535         const CharReach &currReach = g[v].char_reach;
536         const auto &currReports = g[v].reports;
537         for (auto t : intersection) {
538             const VertexInfo &info2 = infoMap[t];
539 
540             if (t == v || info2.isRemoved || is_special(t, g)) {
541                 continue;
542             }
543 
544             /* note: links to accepts are also tracked in succs */
545             if (info.isAccept && currReports != g[t].reports) {
546                 continue;
547             }
548 
549             /* we do not need to check for out edge tops - as only specials
550              * (start) can have tops and they are already disqualified. */
551             if (hasInEdgeTops(g, t)) {
552                 continue; // Conservatively skip anything with nonzero tops.
553             }
554 
555             if (som) {
556                 if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
557                     continue; // can only merge like with like.
558                 }
559             }
560 
561             // If in-degree of v == in-degree of target
562             // and out-degree of v == out-degree of target
563             //    (because pred and succ are supersets)
564             // then combine charreach of v into target and remove v
565             if (info.inDegree() == info2.inDegree()
566                 && info.outDegree() == info2.outDegree()) {
567                 // add character reachability of v into target
568                 CharReach &otherReach = g[t].char_reach;
569                 otherReach |= currReach;
570                 // v can be removed
571                 DEBUG_PRINTF("removing redundant vertex %zu and merging "
572                              "reachability with vertex %zu\n",
573                              g[v].index, g[t].index);
574                 markForRemoval(v, infoMap, removable);
575                 changed = true;
576                 break;
577             }
578         }
579     }
580 
581     return changed;
582 }
583 
584 namespace {
585 
586 struct ReachMismatch {};
587 
588 class ReachSubsetVisitor : public boost::default_dfs_visitor {
589 public:
ReachSubsetVisitor(const CharReach & r)590     explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
591 
592     template <class Graph, class Vertex>
discover_vertex(const Vertex & v,const Graph & g) const593     void discover_vertex(const Vertex &v, const Graph &g) const {
594         if (is_any_start(v, g)) {
595             return; // start vertices are OK
596         } else if (is_special(v, g)) {
597             assert(0);
598             throw ReachMismatch(); // other special nodes??
599         }
600 
601         const CharReach &vcr = g[v].char_reach;
602         DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
603                      cr.count());
604         if (vcr != (vcr & cr)) {
605             throw ReachMismatch();
606         }
607     }
608 
609 private:
610     const CharReach &cr;
611 };
612 
613 /** Terminator function for DFS used in pathReachSubset. */
614 template <class Graph, class Vertex> class VertexIs {
615 public:
VertexIs(const Vertex & v)616     explicit VertexIs(const Vertex &v) : vertex(v) {}
operator ()(const Vertex & v,const Graph &) const617     bool operator()(const Vertex &v, const Graph &) const {
618         return v == vertex;
619     }
620 
621 private:
622     Vertex vertex;
623 };
624 
625 } // namespace
626 
627 /** Returns true if every vertex on paths leading to edge \p e has reachability
628  * which is a subset of the reachability of \p dom */
629 static
reversePathReachSubset(const NFAEdge & e,const NFAVertex & dom,const NGHolder & g)630 bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
631                             const NGHolder &g) {
632     const CharReach &domReach = g[dom].char_reach;
633     if (domReach.all()) {
634         return true;
635     }
636 
637     NFAVertex start = source(e, g);
638     using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
639     map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
640 
641     // Walk the graph backwards from v, examining each node. We fail (return
642     // false) if we encounter a node with reach NOT a subset of domReach, and
643     // we stop searching at dom.
644     try {
645         depth_first_visit(RevGraph(g), start,
646                           ReachSubsetVisitor(domReach),
647                           make_assoc_property_map(vertexColor),
648                           VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
649     } catch(ReachMismatch&) {
650         return false;
651     }
652 
653     return true;
654 }
655 
656 /** Returns true if every vertex on paths leading from edge \p e has
657  * reachability which is a subset of the reachability of \p dom */
658 static
forwardPathReachSubset(const NFAEdge & e,const NFAVertex & dom,const NGHolder & g)659 bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
660                             const NGHolder &g) {
661     const CharReach &domReach = g[dom].char_reach;
662     if (domReach.all()) {
663         return true;
664     }
665 
666     NFAVertex start = target(e, g);
667     map<NFAVertex, boost::default_color_type> vertexColor;
668 
669     // Walk the graph forward from v, examining each node. We fail (return
670     // false) if we encounter a node with reach NOT a subset of domReach, and
671     // we stop searching at dom.
672     try {
673         depth_first_visit(g, start, ReachSubsetVisitor(domReach),
674                           make_assoc_property_map(vertexColor),
675                           VertexIs<NGHolder, NFAVertex>(dom));
676     } catch(ReachMismatch&) {
677         return false;
678     }
679 
680     return true;
681 }
682 
683 static
allOutsSpecial(NFAVertex v,const NGHolder & g)684 bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
685     for (auto w : adjacent_vertices_range(v, g)) {
686         if (!is_special(w, g)) {
687             return false;
688         }
689     }
690     return true;
691 }
692 
693 static
allInsSpecial(NFAVertex v,const NGHolder & g)694 bool allInsSpecial(NFAVertex v, const NGHolder &g) {
695     for (auto u : inv_adjacent_vertices_range(v, g)) {
696         if (!is_special(u, g)) {
697             return false;
698         }
699     }
700     return true;
701 }
702 
703 /** Cheaply check whether this graph can't be reduced at all, because it is
704  * just a chain of vertices with no other edges. */
705 static
isIrreducible(const NGHolder & g)706 bool isIrreducible(const NGHolder &g) {
707     for (auto v : vertices_range(g)) {
708         // skip specials
709         if (is_special(v, g)) {
710             continue;
711         }
712 
713         if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) {
714             return false;
715         }
716         if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) {
717             return false;
718         }
719     }
720 
721     /* if calcComponents got sleepy and went home, the above checks don't hold
722      * as it assumes there is only one connected component. */
723     if (isAlternationOfClasses(g)) {
724         return false;
725     }
726 
727     return true;
728 }
729 
730 static
findCyclic(const NGHolder & g,vector<bool> & cyclic)731 u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
732     u32 count = 0;
733 
734     cyclic.resize(num_vertices(g));
735 
736     for (auto v : vertices_range(g)) {
737         assert(g[v].index < cyclic.size());
738         if (hasSelfLoop(v, g)) {
739             count++;
740             cyclic[g[v].index] = true;
741         }
742     }
743 
744     return count;
745 }
746 
747 static
findCyclicDom(NGHolder & g,vector<bool> & cyclic,set<NFAEdge> & dead,som_type som)748 void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
749                    set<NFAEdge> &dead, som_type som) {
750     auto dominators = findDominators(g);
751 
752     for (auto v : vertices_range(g)) {
753         if (is_special(v, g)) {
754             continue;
755         }
756 
757         // Path in through a dominator (e.g. '.+a?foobar')
758         NFAVertex dom = dominators[v];
759         if (dom && cyclic[g[dom].index]
760             && edge(dom, v, g).second) {
761 
762             if (som && dom == g.startDs) {
763                 continue;
764             }
765 
766             DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic "
767                          "vertex %zu\n", g[v].index, g[dom].index);
768 
769             // iff all paths through in-edge e of v involve vertices whose
770             // reachability is a subset of reach(dom), we can delete edge e.
771             for (const auto &e : in_edges_range(v, g)) {
772                 if (source(e, g) == dom) {
773                     continue;
774                 }
775 
776                 if (reversePathReachSubset(e, dom, g)) {
777                     DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading "
778                                  "paths share dom reach\n",
779                                  g[source(e, g)].index, g[target(e, g)].index);
780                     dead.insert(e);
781                     if (source(e, g) == v) {
782                         cyclic[g[v].index] = false;
783                     }
784                     continue;
785                 }
786             }
787         }
788     }
789 }
790 
791 static
findCyclicPostDom(NGHolder & g,vector<bool> & cyclic,set<NFAEdge> & dead)792 void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
793                        set<NFAEdge> &dead) {
794     auto postdominators = findPostDominators(g);
795 
796     for (auto v : vertices_range(g)) {
797         if (is_special(v, g)) {
798             continue;
799         }
800 
801         // Path out through a post-dominator (e.g. a?.+foobar')
802         NFAVertex postdom = postdominators[v];
803         if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) {
804             DEBUG_PRINTF("vertex %zu is postdominated by directly-connected "
805                          "cyclic vertex %zu\n", g[v].index, g[postdom].index);
806 
807             // iff all paths through in-edge e of v involve vertices whose
808             // reachability is a subset of reach(dom), we can delete edge e.
809             for (const auto &e : out_edges_range(v, g)) {
810                 if (target(e, g) == postdom) {
811                     continue;
812                 }
813 
814                 if (forwardPathReachSubset(e, postdom, g)) {
815                     DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing "
816                                  "paths share postdom reach\n",
817                                  g[source(e, g)].index, g[target(e, g)].index);
818                     if (target(e, g) == v) {
819                         cyclic[g[v].index] = false;
820                     }
821                     dead.insert(e);
822                     continue;
823                 }
824             }
825         }
826     }
827 }
828 
removeRedundancy(NGHolder & g,som_type som)829 bool removeRedundancy(NGHolder &g, som_type som) {
830     DEBUG_PRINTF("rr som = %d\n", (int)som);
831     renumber_vertices(g);
832 
833     // Cheap check: if all the non-special vertices have in-degree one and
834     // out-degree one, there's no redundancy in this here graph and we can
835     // vamoose.
836     if (isIrreducible(g)) {
837         return false;
838     }
839 
840     VertexInfoMap infoMap(g);
841 
842     // Populate maps of successors and predecessors, and accept status
843     populateContainers(g, infoMap);
844 
845     /* Run multiple passes: terminate when a full pass doesn't remove
846      * any vertices */
847     bool doUseless = true;
848     bool doDiamond = true;
849     set<NFAVertex> removable;
850     while (doUseless || doDiamond) {
851         if (doUseless
852             && doUselessMergePass(g, som, infoMap, removable)) {
853             doDiamond = true;
854         }
855         doUseless = false;
856 
857         if (doDiamond
858             && doDiamondMergePass(g, som, infoMap, removable)) {
859             doUseless = true;
860         }
861         doDiamond = false;
862     }
863     DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
864     remove_vertices(removable, g);
865 
866     return !removable.empty();
867 }
868 
869 /** UE-524: remove edges into nodes that are dominated by cyclic nodes with
870  * reachability that is a superset of all paths feeding into that edge. */
removeCyclicDominated(NGHolder & g,som_type som)871 bool removeCyclicDominated(NGHolder &g, som_type som) {
872     set<NFAEdge> dead;
873     vector<bool> cyclic;
874     bool changed = false;
875 
876     findCyclic(g, cyclic);
877 
878     findCyclicDom(g, cyclic, dead, som);
879     if (!dead.empty()) {
880         remove_edges(dead, g);
881         pruneUseless(g);
882         dead.clear();
883         cyclic.clear();  // need to recalculate cyclic as ids have changed
884         findCyclic(g, cyclic);
885         changed = true;
886     }
887 
888     findCyclicPostDom(g, cyclic, dead);
889     if (!dead.empty()) {
890         remove_edges(dead, g);
891         pruneUseless(g);
892         dead.clear();
893         changed = true;
894     }
895 
896     return changed;
897 }
898 
899 } // namespace ue2
900