1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Functions for pruning unreachable vertices or reports from the graph.
31  */
32 #include "ng_prune.h"
33 
34 #include "ng_dominators.h"
35 #include "ng_holder.h"
36 #include "ng_reports.h"
37 #include "ng_util.h"
38 #include "util/container.h"
39 #include "util/graph.h"
40 #include "util/graph_range.h"
41 #include "util/graph_small_color_map.h"
42 #include "util/report_manager.h"
43 
44 #include <deque>
45 #include <map>
46 
47 #include <boost/graph/depth_first_search.hpp>
48 #include <boost/graph/reverse_graph.hpp>
49 
50 using namespace std;
51 using boost::default_color_type;
52 using boost::reverse_graph;
53 
54 namespace ue2 {
55 
56 /** Remove any vertices that can't be reached by traversing the graph in
57  * reverse from acceptEod. */
pruneUnreachable(NGHolder & g)58 void pruneUnreachable(NGHolder &g) {
59     deque<NFAVertex> dead;
60 
61     if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g)
62         && edge(g.accept, g.acceptEod, g).second) {
63         // Trivial case: there are no in-edges to our accepts (other than
64         // accept->acceptEod), so all non-specials are unreachable.
65         for (auto v : vertices_range(g)) {
66             if (!is_special(v, g)) {
67                 dead.push_back(v);
68             }
69         }
70     } else {
71         // Walk a reverse graph from acceptEod with Boost's depth_first_visit
72         // call.
73         typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph;
74         RevNFAGraph revg(g);
75 
76         map<RevNFAGraph::vertex_descriptor, default_color_type> colours;
77 
78         depth_first_visit(revg, g.acceptEod,
79                           make_dfs_visitor(boost::null_visitor()),
80                           make_assoc_property_map(colours));
81 
82         DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
83 
84         // All non-special vertices that aren't in the colour map (because they
85         // weren't reached) can be removed.
86         for (auto v : vertices_range(revg)) {
87             if (is_special(v, revg)) {
88                 continue;
89             }
90             if (!contains(colours, v)) {
91                 dead.push_back(v);
92             }
93         }
94     }
95 
96     if (dead.empty()) {
97         DEBUG_PRINTF("no unreachable vertices\n");
98         return;
99     }
100 
101     remove_vertices(dead, g, false);
102     DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
103 }
104 
105 template<class nfag_t>
106 static
pruneForwardUseless(NGHolder & h,const nfag_t & g,typename nfag_t::vertex_descriptor s,decltype(make_small_color_map (NGHolder ()))& colors)107 bool pruneForwardUseless(NGHolder &h, const nfag_t &g,
108                          typename nfag_t::vertex_descriptor s,
109                          decltype(make_small_color_map(NGHolder())) &colors) {
110     // Begin with all vertices set to white, as DFV only marks visited
111     // vertices.
112     colors.fill(small_color::white);
113 
114     depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors);
115 
116     vector<NFAVertex> dead;
117 
118     // All non-special vertices that are still white can be removed.
119     for (auto v : vertices_range(g)) {
120         if (!is_special(v, g) && get(colors, v) == small_color::white) {
121             DEBUG_PRINTF("vertex %zu is unreachable from %zu\n",
122                          g[v].index, g[s].index);
123             dead.push_back(NFAVertex(v));
124         }
125     }
126 
127     if (dead.empty()) {
128         return false;
129     }
130 
131     DEBUG_PRINTF("removing %zu vertices\n", dead.size());
132     remove_vertices(dead, h, false);
133     return true;
134 }
135 
136 /** Remove any vertices which can't be reached by traversing the graph forward
137  * from start or in reverse from acceptEod. If \p renumber is false, no
138  * vertex/edge renumbering is done. */
pruneUseless(NGHolder & g,bool renumber)139 void pruneUseless(NGHolder &g, bool renumber) {
140     DEBUG_PRINTF("pruning useless vertices\n");
141     assert(hasCorrectlyNumberedVertices(g));
142     auto colors = make_small_color_map(g);
143 
144     bool work_done = pruneForwardUseless(g, g, g.start, colors);
145     work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g),
146                                      g.acceptEod, colors);
147 
148     if (!work_done) {
149         return;
150     }
151 
152     if (renumber) {
153         renumber_edges(g);
154         renumber_vertices(g);
155     }
156 }
157 
158 /** This code removes any vertices which do not accept any symbols. Any
159  * vertices which no longer lie on a path from a start to an accept are also
160  * pruned. */
pruneEmptyVertices(NGHolder & g)161 void pruneEmptyVertices(NGHolder &g) {
162     DEBUG_PRINTF("pruning empty vertices\n");
163     vector<NFAVertex> dead;
164     for (auto v : vertices_range(g)) {
165         if (is_special(v, g)) {
166             continue;
167         }
168 
169         const CharReach &cr = g[v].char_reach;
170         if (cr.none()) {
171             DEBUG_PRINTF("empty: %zu\n", g[v].index);
172             dead.push_back(v);
173         }
174     }
175 
176     if (dead.empty()) {
177         return;
178     }
179 
180     remove_vertices(dead, g);
181     pruneUseless(g);
182 }
183 
184 /** Remove any edges from vertices that generate accepts (for Highlander
185  * graphs). */
pruneHighlanderAccepts(NGHolder & g,const ReportManager & rm)186 void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
187     // Safety check: all reports must be simple exhaustible reports, or this is
188     // not safe. This optimisation should be called early enough that no
189     // internal reports have been added.
190     for (auto report_id : all_reports(g)) {
191         const Report &ir = rm.getReport(report_id);
192 
193         if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
194             !isExternalReport(ir)) {
195             DEBUG_PRINTF("report %u is not external highlander with "
196                          "no bounds\n", report_id);
197             return;
198         }
199     }
200 
201     vector<NFAEdge> dead;
202     for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
203         if (is_special(u, g)) {
204             continue;
205         }
206 
207         // We can prune any out-edges that aren't accepts
208         for (const auto &e : out_edges_range(u, g)) {
209             if (!is_any_accept(target(e, g), g)) {
210                 dead.push_back(e);
211             }
212         }
213     }
214 
215     if (dead.empty()) {
216         return;
217     }
218 
219     DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
220     remove_edges(dead, g);
221     pruneUseless(g);
222 }
223 
224 static
isDominatedByReporter(const NGHolder & g,const unordered_map<NFAVertex,NFAVertex> & dom,NFAVertex v,ReportID report_id)225 bool isDominatedByReporter(const NGHolder &g,
226                            const unordered_map<NFAVertex, NFAVertex> &dom,
227                            NFAVertex v, ReportID report_id) {
228     for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
229         NFAVertex u = it->second;
230         // Note: reporters with edges only to acceptEod are not considered to
231         // dominate.
232         if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
233             DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n",
234                           g[v].index, g[u].index, report_id);
235             return true;
236         }
237         v = u;
238     }
239     return false;
240 }
241 
242 /**
243  * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
244  * itself and (c) only simple exhaustible reports.
245  */
246 static
hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder & g,const ReportManager & rm,NFAVertex v)247 bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
248                                           const ReportManager &rm,
249                                           NFAVertex v) {
250     if (!edge(v, v, g).second) {
251         return false;
252     }
253 
254     for (auto w : adjacent_vertices_range(v, g)) {
255         if (w != v && w != g.accept) {
256             return false;
257         }
258     }
259 
260     for (const auto &report_id : g[v].reports) {
261         if (!isSimpleExhaustible(rm.getReport(report_id))) {
262             return false;
263         }
264     }
265 
266     return true;
267 }
268 
pruneHighlanderDominated(NGHolder & g,const ReportManager & rm)269 void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
270     vector<NFAVertex> reporters;
271     for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
272         for (const auto &report_id : g[v].reports) {
273             const Report &r = rm.getReport(report_id);
274             if (isSimpleExhaustible(r)) {
275                 reporters.push_back(v);
276                 break;
277             }
278         }
279     }
280     for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
281         for (const auto &report_id : g[v].reports) {
282             const Report &r = rm.getReport(report_id);
283             if (isSimpleExhaustible(r)) {
284                 reporters.push_back(v);
285                 break;
286             }
287         }
288     }
289 
290     if (reporters.empty()) {
291         return;
292     }
293 
294 
295     sort(begin(reporters), end(reporters));
296     reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
297 
298     DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
299                  reporters.size());
300 
301     const auto &dom = findDominators(g);
302     bool modified = false;
303 
304     // If a reporter vertex is dominated by another with the same report, we
305     // can remove that report; if all reports are removed, we can remove the
306     // vertex entirely.
307     for (const auto v : reporters) {
308         const auto reports = g[v].reports; // copy, as we're going to mutate
309         for (const auto &report_id : reports) {
310             if (!isSimpleExhaustible(rm.getReport(report_id))) {
311                 continue;
312             }
313             if (isDominatedByReporter(g, dom, v, report_id)) {
314                 DEBUG_PRINTF("removed dominated report %u from vertex %zu\n",
315                              report_id, g[v].index);
316                 g[v].reports.erase(report_id);
317             }
318         }
319 
320         if (g[v].reports.empty()) {
321             DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n",
322                           g[v].index);
323             remove_edge(v, g.accept, g);
324             remove_edge(v, g.acceptEod, g);
325             modified = true;
326         }
327     }
328 
329     // If a reporter vertex has a self-loop, but otherwise only leads to accept
330     // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
331     // the self-loop.
332     for (const auto v : reporters) {
333         if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
334             remove_edge(v, v, g);
335             modified = true;
336             DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index);
337         }
338     }
339 
340     if (!modified) {
341         return;
342     }
343 
344     pruneUseless(g);
345 
346     // We may have only removed self-loops, in which case pruneUseless wouldn't
347     // renumber, so we do edge renumbering explicitly here.
348     renumber_edges(g);
349 }
350 
351 /** Removes the given Report ID from vertices connected to accept, and then
352  * prunes useless vertices that have had their report sets reduced to empty. */
pruneReport(NGHolder & g,ReportID report)353 void pruneReport(NGHolder &g, ReportID report) {
354     set<NFAEdge> dead;
355 
356     for (const auto &e : in_edges_range(g.accept, g)) {
357         NFAVertex u = source(e, g);
358         auto &reports = g[u].reports;
359         if (contains(reports, report)) {
360             reports.erase(report);
361             if (reports.empty()) {
362                 dead.insert(e);
363             }
364         }
365     }
366 
367     for (const auto &e : in_edges_range(g.acceptEod, g)) {
368         NFAVertex u = source(e, g);
369         if (u == g.accept) {
370             continue;
371         }
372         auto &reports = g[u].reports;
373         if (contains(reports, report)) {
374             reports.erase(report);
375             if (reports.empty()) {
376                 dead.insert(e);
377             }
378         }
379     }
380 
381     if (dead.empty()) {
382         return;
383     }
384 
385     remove_edges(dead, g);
386     pruneUnreachable(g);
387     renumber_vertices(g);
388     renumber_edges(g);
389 }
390 
391 /** Removes all Report IDs bar the given one from vertices connected to accept,
392  * and then prunes useless vertices that have had their report sets reduced to
393  * empty. */
pruneAllOtherReports(NGHolder & g,ReportID report)394 void pruneAllOtherReports(NGHolder &g, ReportID report) {
395     set<NFAEdge> dead;
396 
397     for (const auto &e : in_edges_range(g.accept, g)) {
398         NFAVertex u = source(e, g);
399         auto &reports = g[u].reports;
400         if (contains(reports, report)) {
401             reports.clear();
402             reports.insert(report);
403         } else {
404             reports.clear();
405             dead.insert(e);
406         }
407     }
408 
409     for (const auto &e : in_edges_range(g.acceptEod, g)) {
410         NFAVertex u = source(e, g);
411         if (u == g.accept) {
412             continue;
413         }
414         auto &reports = g[u].reports;
415         if (contains(reports, report)) {
416             reports.clear();
417             reports.insert(report);
418         } else {
419             reports.clear();
420             dead.insert(e);
421         }
422     }
423 
424     if (dead.empty()) {
425         return;
426     }
427 
428     remove_edges(dead, g);
429     pruneUnreachable(g);
430     renumber_vertices(g);
431     renumber_edges(g);
432 }
433 
434 } // namespace ue2
435