1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Miscellaneous NFA graph utilities.
31  */
32 #ifndef NG_UTIL_H
33 #define NG_UTIL_H
34 
35 #include "ng_depth.h"
36 #include "ng_holder.h"
37 #include "ue2common.h"
38 #include "util/flat_containers.h"
39 #include "util/graph.h"
40 #include "util/graph_range.h"
41 
42 #include <boost/graph/depth_first_search.hpp> // for default_dfs_visitor
43 
44 #include <algorithm>
45 #include <map>
46 #include <unordered_map>
47 #include <vector>
48 
49 namespace ue2 {
50 
51 struct Grey;
52 struct ue2_literal;
53 class ReportManager;
54 
55 template<class VertexDepth>
maxDistFromInit(const VertexDepth & vd)56 depth maxDistFromInit(const VertexDepth &vd) {
57     if (vd.fromStart.max.is_unreachable()) {
58         return vd.fromStartDotStar.max;
59     } else if (vd.fromStartDotStar.max.is_unreachable()) {
60         return vd.fromStart.max;
61     } else {
62         return std::max(vd.fromStartDotStar.max, vd.fromStart.max);
63     }
64 }
65 
66 template<class VertexDepth>
maxDistFromStartOfData(const VertexDepth & vd)67 depth maxDistFromStartOfData(const VertexDepth &vd) {
68     if (vd.fromStartDotStar.max.is_reachable()) {
69         /* the irrepressible nature of floating literals cannot be contained */
70         return depth::infinity();
71     } else {
72         return vd.fromStart.max;
73     }
74 }
75 
76 /** True if the given vertex is a dot (reachable on any character). */
77 template<class GraphT>
78 static really_inline
is_dot(NFAVertex v,const GraphT & g)79 bool is_dot(NFAVertex v, const GraphT &g) {
80     return g[v].char_reach.all();
81 }
82 
83 /** adds successors of v to s */
84 template<class U>
85 static really_inline
succ(const NGHolder & g,NFAVertex v,U * s)86 void succ(const NGHolder &g, NFAVertex v, U *s) {
87     auto rv = adjacent_vertices(v, g);
88     s->insert(rv.first, rv.second);
89 }
90 
91 template<class ContTemp = flat_set<NFAVertex>>
succs(NFAVertex u,const NGHolder & g)92 ContTemp succs(NFAVertex u, const NGHolder &g) {
93     ContTemp rv;
94     succ(g, u, &rv);
95     return rv;
96 }
97 
98 /** adds predecessors of v to s */
99 template<class U>
100 static really_inline
pred(const NGHolder & g,NFAVertex v,U * p)101 void pred(const NGHolder &g, NFAVertex v, U *p) {
102     auto rv = inv_adjacent_vertices(v, g);
103     p->insert(rv.first, rv.second);
104 }
105 
106 template<class ContTemp = flat_set<NFAVertex>>
preds(NFAVertex u,const NGHolder & g)107 ContTemp preds(NFAVertex u, const NGHolder &g) {
108     ContTemp rv;
109     pred(g, u, &rv);
110     return rv;
111 }
112 
113 /** returns a vertex with an out edge from v and is not v.
114  * v must have exactly one out-edge excluding self-loops.
115  * will return NGHolder::null_vertex() if the preconditions don't hold.
116  */
117 NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v);
118 
119 /** Like getSoleDestVertex but for in-edges */
120 NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v);
121 
122 /** \brief edge filtered graph.
123  *
124  * This will give you a view over the graph that has none of the edges from
125  * the provided set included.
126  *
127  * If this is provided with the back edges of the graph, this will result in an
128  * acyclic subgraph view. This is useful for topological_sort and other
129  * algorithms that require a DAG.
130  */
131 template<typename EdgeSet>
132 struct bad_edge_filter {
bad_edge_filterbad_edge_filter133     bad_edge_filter() {}
bad_edge_filterbad_edge_filter134     explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {}
operatorbad_edge_filter135     bool operator()(const typename EdgeSet::value_type &e) const {
136         return !contains(*bad_edges, e); /* keep edges not in the bad set */
137     }
138     const EdgeSet *bad_edges = nullptr;
139 };
140 
141 template<typename EdgeSet>
make_bad_edge_filter(const EdgeSet * e)142 bad_edge_filter<EdgeSet> make_bad_edge_filter(const EdgeSet *e) {
143     return bad_edge_filter<EdgeSet>(e);
144 }
145 
146 /** \brief vertex graph filter. */
147 template<typename VertexSet>
148 struct bad_vertex_filter {
149     bad_vertex_filter() = default;
bad_vertex_filterbad_vertex_filter150     explicit bad_vertex_filter(const VertexSet *bad_v) : bad_vertices(bad_v) {}
operatorbad_vertex_filter151     bool operator()(const typename VertexSet::value_type &v) const {
152         return !contains(*bad_vertices, v); /* keep vertices not in bad set */
153     }
154     const VertexSet *bad_vertices = nullptr;
155 };
156 
157 template<typename VertexSet>
make_bad_vertex_filter(const VertexSet * v)158 bad_vertex_filter<VertexSet> make_bad_vertex_filter(const VertexSet *v) {
159     return bad_vertex_filter<VertexSet>(v);
160 }
161 
162 /** Visitor that records back edges */
163 template <typename BackEdgeSet>
164 class BackEdges : public boost::default_dfs_visitor {
165 public:
BackEdges(BackEdgeSet & edges)166     explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {}
167     template <class EdgeT, class GraphT>
back_edge(const EdgeT & e,const GraphT &)168     void back_edge(const EdgeT &e, const GraphT &) {
169         backEdges.insert(e); // Remove this back edge only
170     }
171     BackEdgeSet &backEdges;
172 };
173 
174 /** Returns true if the vertex is either of the real starts (NODE_START,
175  *  NODE_START_DOTSTAR). */
176 template <typename GraphT>
177 static really_inline
is_any_start(typename GraphT::vertex_descriptor v,const GraphT & g)178 bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) {
179     u32 i = g[v].index;
180     return i == NODE_START || i == NODE_START_DOTSTAR;
181 }
182 
183 bool is_virtual_start(NFAVertex v, const NGHolder &g);
184 
185 template <typename GraphT>
is_any_accept(typename GraphT::vertex_descriptor v,const GraphT & g)186 bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) {
187     u32 i = g[v].index;
188     return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD;
189 }
190 
191 /** returns true iff v has an edge to accept or acceptEod */
192 template <typename GraphT>
is_match_vertex(typename GraphT::vertex_descriptor v,const GraphT & g)193 bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) {
194     return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second;
195 }
196 
197 /** Generate a reverse topological ordering for a back-edge filtered version of
198  * our graph (as it must be a DAG and correctly numbered).
199  *
200  * Note: we ensure that we produce a topo ordering that begins with acceptEod
201  * and accept (if present) and ends with startDs followed by start.
202  */
203 std::vector<NFAVertex> getTopoOrdering(const NGHolder &g);
204 
205 bool onlyOneTop(const NGHolder &g);
206 
207 /** Return the set of the tops on the given graph. */
208 flat_set<u32> getTops(const NGHolder &h);
209 
210 /** Initialise the tops on h to the provide top. Assumes that h is triggered and
211  * no tops have been set on h. */
212 void setTops(NGHolder &h, u32 top = DEFAULT_TOP);
213 
214 /** adds a vertex to g with all the same vertex properties as \p v (aside from
215  * index) */
216 NFAVertex clone_vertex(NGHolder &g, NFAVertex v);
217 
218 /**
219  * \brief Copies all out-edges from source to target.
220  *
221  * Edge properties (aside from index) are preserved and duplicate edges are
222  * skipped.
223  */
224 void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
225 
226 /**
227  * \brief Copies all in-edges from source to target.
228  *
229  * Edge properties (aside from index) are preserved.
230  */
231 void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
232 
233 /** \brief True if the graph contains an edge from one of {start, startDs} to
234  * one of {accept, acceptEod}. */
235 bool isVacuous(const NGHolder &h);
236 
237 /** \brief True if the graph contains no floating vertices (startDs has no
238  * proper successors). */
239 bool isAnchored(const NGHolder &h);
240 
241 /** \brief True if the graph contains no anchored vertices (start has no
242  * successors aside from startDs or vertices connected to startDs). */
243 bool isFloating(const NGHolder &h);
244 
245 /** True if the graph contains no back-edges at all, other than the
246  * startDs self-loop. */
247 bool isAcyclic(const NGHolder &g);
248 
249 /** True if the graph has a cycle reachable from the given source vertex. */
250 bool hasReachableCycle(const NGHolder &g, NFAVertex src);
251 
252 /** True if g has any cycles which are not self-loops. */
253 bool hasBigCycles(const NGHolder &g);
254 
255 /**
256  * \brief True if g has at least one non-special vertex with reach smaller than
257  * max_reach_count. The default of 200 is pretty conservative.
258  */
259 bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200);
260 
261 /** Returns the set of all vertices that appear in any of the graph's cycles. */
262 std::set<NFAVertex> findVerticesInCycles(const NGHolder &g);
263 
264 bool can_never_match(const NGHolder &g);
265 
266 /* \brief Does the graph have any edges leading into acceptEod (aside from
267  * accept) or will it have after resolving asserts? */
268 bool can_match_at_eod(const NGHolder &h);
269 
270 bool can_only_match_at_eod(const NGHolder &g);
271 
272 /** \brief Does this graph become a "firehose", matching between every
273  * byte? */
274 bool matches_everywhere(const NGHolder &h);
275 
276 
277 struct mbsb_cache {
mbsb_cachembsb_cache278     explicit mbsb_cache(const NGHolder &gg) : g(gg) {}
279     std::map<std::pair<u32, u32>, bool> cache;
280     const NGHolder &g;
281 };
282 
283 /* weaker than straight domination as allows jump edges */
284 bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
285                      mbsb_cache &cache);
286 
287 /* adds the literal 's' to the end of the graph before h.accept */
288 void appendLiteral(NGHolder &h, const ue2_literal &s);
289 
290 /** \brief Fill graph \a outp with a subset of the vertices in \a in (given in
291  * \a in). A vertex mapping is returned in \a v_map_out. */
292 void fillHolder(NGHolder *outp, const NGHolder &in,
293                 const std::deque<NFAVertex> &vv,
294                 std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
295 
296 /** \brief Clone the graph in \a in into graph \a out, returning a vertex
297  * mapping in \a v_map_out. */
298 void cloneHolder(NGHolder &out, const NGHolder &in,
299                  std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
300 
301 /** \brief Clone the graph in \a in into graph \a out. */
302 void cloneHolder(NGHolder &out, const NGHolder &in);
303 
304 /** \brief Build a clone of graph \a in and return a pointer to it. */
305 std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in);
306 
307 /** \brief Clear all reports on vertices that do not have an edge to accept or
308  * acceptEod. */
309 void clearReports(NGHolder &g);
310 
311 /** \brief Add report \a r_new to every vertex that already has report \a
312  * r_old. */
313 void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
314 
315 /** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
316  * accepts. */
317 void reverseHolder(const NGHolder &g, NGHolder &out);
318 
319 /** \brief Returns the delay or ~0U if the graph cannot match with
320  * the trailing literal. */
321 u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
322                                 u32 max_delay, bool overhang_ok = true);
323 
324 #ifndef NDEBUG
325 
326 // Assertions: only available in internal builds.
327 
328 /**
329  * Used in sanity-checking assertions: returns true if all vertices
330  * with edges to accept or acceptEod have at least one report ID. Additionally,
331  * checks that ONLY vertices with edges to accept or acceptEod has reports.
332  */
333 bool allMatchStatesHaveReports(const NGHolder &g);
334 
335 /**
336  * Assertion: returns true if the graph is triggered and all edges out of start
337  * have tops OR if the graph is not-triggered and all edges out of start have no
338  * tops.
339  */
340 bool isCorrectlyTopped(const NGHolder &g);
341 #endif // NDEBUG
342 
343 } // namespace ue2
344 
345 #endif
346