1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Analysis for literals decorated by leading/trailing assertions or
31  * character classes.
32  */
33 #include "ng_literal_decorated.h"
34 
35 #include "nfagraph/ng_holder.h"
36 #include "nfagraph/ng_util.h"
37 #include "rose/rose_build.h"
38 #include "rose/rose_in_graph.h"
39 #include "rose/rose_in_util.h"
40 #include "util/compile_context.h"
41 #include "util/dump_charclass.h"
42 #include "util/make_unique.h"
43 
44 #include <algorithm>
45 #include <memory>
46 #include <sstream>
47 
48 using namespace std;
49 
50 namespace ue2 {
51 
52 namespace {
53 
54 /** \brief Max fixed-width paths to generate from a graph. */
55 static constexpr size_t MAX_PATHS = 10;
56 
57 /** \brief Max degree for any non-special vertex in the graph. */
58 static constexpr size_t MAX_VERTEX_DEGREE = 6;
59 
60 using Path = vector<NFAVertex>;
61 
62 } // namespace
63 
64 static
findPaths(const NGHolder & g,vector<Path> & paths)65 bool findPaths(const NGHolder &g, vector<Path> &paths) {
66     vector<NFAVertex> order = getTopoOrdering(g);
67 
68     vector<size_t> read_count(num_vertices(g));
69     vector<vector<Path>> built(num_vertices(g));
70 
71     for (auto it = order.rbegin(); it != order.rend(); ++it) {
72         NFAVertex v = *it;
73         auto &out = built[g[v].index];
74         assert(out.empty());
75 
76         read_count[g[v].index] = out_degree(v, g);
77 
78         DEBUG_PRINTF("setting read_count to %zu for %zu\n",
79                       read_count[g[v].index], g[v].index);
80 
81         if (v == g.start || v == g.startDs) {
82             out.push_back({v});
83             continue;
84         }
85 
86         // The paths to v are the paths to v's predecessors, with v added to
87         // the end of each.
88         for (auto u : inv_adjacent_vertices_range(v, g)) {
89             // We have a stylized connection from start -> startDs, but we
90             // don't need anchored and unanchored versions of the same path.
91             if (u == g.start && edge(g.startDs, v, g).second) {
92                 continue;
93             }
94 
95             // Similarly, avoid the accept->acceptEod edge.
96             if (u == g.accept) {
97                 assert(v == g.acceptEod);
98                 continue;
99             }
100 
101             assert(!built[g[u].index].empty());
102             assert(read_count[g[u].index]);
103 
104             for (const auto &p : built[g[u].index]) {
105                 out.push_back(p);
106                 out.back().push_back(v);
107 
108                 if (out.size() > MAX_PATHS) {
109                     // All these paths should eventually end up at a sink, so
110                     // we've blown past our limit.
111                     DEBUG_PRINTF("path limit exceeded\n");
112                     return false;
113                 }
114             }
115 
116             read_count[g[u].index]--;
117             if (!read_count[g[u].index]) {
118                 DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index);
119                 built[g[u].index].clear();
120                 built[g[u].index].shrink_to_fit();
121             }
122         }
123     }
124 
125     insert(&paths, paths.end(), built[NODE_ACCEPT]);
126     insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]);
127 
128     DEBUG_PRINTF("%zu paths generated\n", paths.size());
129 
130     return paths.size() <= MAX_PATHS;
131 }
132 
133 static
hasLargeDegreeVertex(const NGHolder & g)134 bool hasLargeDegreeVertex(const NGHolder &g) {
135     for (const auto &v : vertices_range(g)) {
136         if (is_special(v, g)) { // specials can have large degree
137             continue;
138         }
139         if (degree(v, g) > MAX_VERTEX_DEGREE) {
140             DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index,
141                          degree(v, g));
142             return true;
143         }
144     }
145     return false;
146 }
147 
148 #if defined(DEBUG) || defined(DUMP_SUPPORT)
149 static UNUSED
dumpPath(const NGHolder & g,const Path & path)150 string dumpPath(const NGHolder &g, const Path &path) {
151     ostringstream oss;
152     for (const auto &v : path) {
153         switch (g[v].index) {
154         case NODE_START:
155             oss << "<start>";
156             break;
157         case NODE_START_DOTSTAR:
158             oss << "<startDs>";
159             break;
160         case NODE_ACCEPT:
161             oss << "<accept>";
162             break;
163         case NODE_ACCEPT_EOD:
164             oss << "<acceptEod>";
165             break;
166         default:
167             oss << describeClass(g[v].char_reach);
168             break;
169         }
170     }
171     return oss.str();
172 }
173 #endif
174 
175 struct PathMask {
PathMaskue2::PathMask176     PathMask(const NGHolder &g, const Path &path)
177         : is_anchored(path.front() == g.start),
178           is_eod(path.back() == g.acceptEod) {
179         assert(path.size() >= 2);
180         mask.reserve(path.size() - 2);
181         for (const auto &v : path) {
182             if (is_special(v, g)) {
183                 continue;
184             }
185             mask.push_back(g[v].char_reach);
186         }
187 
188         // Reports are attached to the second-to-last vertex.
189         NFAVertex u = *std::next(path.rbegin());
190         reports = g[u].reports;
191         assert(!reports.empty());
192     }
193 
194     vector<CharReach> mask;
195     flat_set<ReportID> reports;
196     bool is_anchored;
197     bool is_eod;
198 };
199 
handleDecoratedLiterals(RoseBuild & rose,const NGHolder & g,const CompileContext & cc)200 bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
201                              const CompileContext &cc) {
202     if (!cc.grey.allowDecoratedLiteral) {
203         return false;
204     }
205 
206     if (!isAcyclic(g)) {
207         DEBUG_PRINTF("not acyclic\n");
208         return false;
209     }
210 
211     if (!hasNarrowReachVertex(g)) {
212         DEBUG_PRINTF("no narrow reach vertices\n");
213         return false;
214     }
215 
216     if (hasLargeDegreeVertex(g)) {
217         DEBUG_PRINTF("large degree\n");
218         return false;
219     }
220 
221     vector<Path> paths;
222     if (!findPaths(g, paths)) {
223         DEBUG_PRINTF("couldn't split into a small number of paths\n");
224         return false;
225     }
226 
227     assert(!paths.empty());
228     assert(paths.size() <= MAX_PATHS);
229 
230     vector<PathMask> masks;
231     masks.reserve(paths.size());
232 
233     for (const auto &path : paths) {
234         DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str());
235         PathMask pm(g, path);
236         if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored,
237                                pm.is_eod)) {
238             DEBUG_PRINTF("failed validation\n");
239             return false;
240         }
241         masks.push_back(move(pm));
242     }
243 
244     for (const auto &pm : masks) {
245         rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod);
246     }
247 
248     DEBUG_PRINTF("all ok, %zu masks added\n", masks.size());
249     return true;
250 }
251 
252 } // namespace ue2
253