1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Region Redundancy optimisation pass.
31  *
32  * Identifies and removes entire regions that are adjacent to a cyclic state
33  * with a superset of their character reachability.
34  */
35 #include "ng_region_redundancy.h"
36 
37 #include "ng_holder.h"
38 #include "ng_region.h"
39 #include "ng_util.h"
40 #include "ue2common.h"
41 #include "util/container.h"
42 #include "util/graph_range.h"
43 
44 #include <set>
45 
46 using namespace std;
47 
48 namespace ue2 {
49 
50 namespace {
51 
52 /** Precalculated information about a region. */
53 struct RegionInfo {
54     NFAVertex entry; //!< arbitrary entry vertex
55     CharReach cr;    //!< union of the reach of all vertices in region
56 };
57 
58 } // namespace
59 
60 static
regionHasUnexpectedAccept(const NGHolder & g,const u32 region,const flat_set<ReportID> & expected_reports,const unordered_map<NFAVertex,u32> & region_map)61 bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region,
62                        const flat_set<ReportID> &expected_reports,
63                        const unordered_map<NFAVertex, u32> &region_map) {
64     /* TODO: only check vertices connected to accept/acceptEOD */
65     for (auto v : vertices_range(g)) {
66         if (region != region_map.at(v)) {
67             continue;
68         }
69 
70         if (is_any_accept(v, g)) {
71             return true; /* encountering an actual special in the region is
72                           * possible but definitely unexpected */
73         }
74 
75         for (auto w : adjacent_vertices_range(v, g)) {
76             if (is_any_accept(w, g) && g[v].reports != expected_reports) {
77                 return true;
78             }
79         }
80     }
81     return false;
82 }
83 
84 static
processCyclicStateForward(NGHolder & h,NFAVertex cyc,const map<u32,RegionInfo> & info,const unordered_map<NFAVertex,u32> & region_map,set<u32> & deadRegions)85 void processCyclicStateForward(NGHolder &h, NFAVertex cyc,
86                          const map<u32, RegionInfo> &info,
87                          const unordered_map<NFAVertex, u32> &region_map,
88                          set<u32> &deadRegions) {
89     u32 region = region_map.at(cyc);
90     CharReach cr = h[cyc].char_reach;
91     auto reports = h[cyc].reports;
92 
93     DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index,
94                  region);
95 
96     map<u32, RegionInfo>::const_iterator it;
97     while ((it = info.find(++region)) != info.end()) {
98         NFAVertex v = it->second.entry;
99         const CharReach &region_cr = it->second.cr;
100         assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
101         DEBUG_PRINTF("checking %zu\n", h[v].index);
102 
103         if (!region_cr.isSubsetOf(cr)) {
104             DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
105             break;
106         }
107 
108         if (isOptionalRegion(h, v, region_map)
109             && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
110             DEBUG_PRINTF("cyclic state %zu leads to optional region leader"
111                          " %zu\n", h[cyc].index, h[v].index);
112             deadRegions.insert(region);
113         } else if (isSingletonRegion(h, v, region_map)) {
114             /* we can use this region as straw and suck in optional regions on
115              * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */
116             cr = h[v].char_reach;
117             reports = h[v].reports;
118             DEBUG_PRINTF("%u is straw\n", region);
119             assert(cr.isSubsetOf(h[cyc].char_reach));
120             if (hasSelfLoop(v, h)) {
121                 DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
122                 remove_edge(v, v, h);
123             }
124         } else {
125             break;
126         }
127     }
128 }
129 
130 static
processCyclicStateReverse(NGHolder & h,NFAVertex cyc,const map<u32,RegionInfo> & info,const unordered_map<NFAVertex,u32> & region_map,set<u32> & deadRegions)131 void processCyclicStateReverse(NGHolder &h, NFAVertex cyc,
132                          const map<u32, RegionInfo> &info,
133                          const unordered_map<NFAVertex, u32> &region_map,
134                          set<u32> &deadRegions) {
135     u32 region = region_map.at(cyc);
136     CharReach cr = h[cyc].char_reach;
137     auto reports = h[cyc].reports;
138 
139     DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region);
140 
141     map<u32, RegionInfo>::const_iterator it;
142     while ((it = info.find(--region)) != info.end()) {
143         NFAVertex v = it->second.entry;
144         const CharReach &region_cr = it->second.cr;
145         assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
146         DEBUG_PRINTF("checking %zu\n", h[v].index);
147 
148         if (!region_cr.isSubsetOf(cr)) {
149             DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
150             break;
151         }
152 
153         if (isOptionalRegion(h, v, region_map)
154             && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
155             DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n",
156                          h[cyc].index, h[v].index);
157             deadRegions.insert(region);
158         } else if (isSingletonRegion(h, v, region_map)) {
159             /* we can use this region as a reverse straw and suck in optional
160              * regions on the other side. This allows us to transform
161              * /^a?a{n}.*b/ to /^a{n}.*b/ */
162             cr = h[v].char_reach;
163             reports = h[v].reports;
164             DEBUG_PRINTF("%u is straw\n", region);
165             assert(cr.isSubsetOf(h[cyc].char_reach));
166             if (hasSelfLoop(v, h)) {
167                 DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
168                 remove_edge(v, v, h);
169             }
170         } else {
171             break;
172         }
173 
174         if (!region) { // No wrapping
175             break;
176         }
177     }
178 }
179 
180 static
buildRegionInfoMap(const NGHolder & g,const unordered_map<NFAVertex,u32> & region_map)181 map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g,
182                    const unordered_map<NFAVertex, u32> &region_map) {
183     map<u32, RegionInfo> info;
184 
185     for (auto v : vertices_range(g)) {
186         u32 region = region_map.at(v);
187         if (is_special(v, g) || region == 0) {
188             continue;
189         }
190 
191         RegionInfo &ri = info[region];
192         ri.cr |= g[v].char_reach;
193         if (isRegionEntry(g, v, region_map)) {
194             ri.entry = v;
195         }
196     }
197 
198     return info;
199 }
200 
201 static
hasNoStartAnchoring(const NGHolder & h)202 bool hasNoStartAnchoring(const NGHolder &h) {
203     for (auto v : adjacent_vertices_range(h.start, h)) {
204         if (!edge(h.startDs, v, h).second) {
205             return false;
206         }
207     }
208     return true;
209 }
210 
removeRegionRedundancy(NGHolder & g,som_type som)211 void removeRegionRedundancy(NGHolder &g, som_type som) {
212     auto region_map = assignRegions(g);
213 
214     map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map);
215 
216     set<u32> deadRegions;
217 
218     /* if we are not tracking som, we can treat sds as a cyclic region if there
219      * is no anchoring */
220     if (!som && hasNoStartAnchoring(g)) {
221         processCyclicStateForward(g, g.startDs, info, region_map, deadRegions);
222     }
223 
224     // Walk the region mapping, looking for regions that consist of a single
225     // cyclic node.
226 
227     for (const auto &m : info) {
228         // Must not have already been removed
229         if (contains(deadRegions, m.first)) {
230             continue;
231         }
232 
233         NFAVertex v = m.second.entry;
234         /* require a singleton cyclic region */
235         if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) {
236             continue;
237         }
238 
239         if (som && is_virtual_start(v, g)) {
240             continue;
241         }
242 
243         processCyclicStateForward(g, v, info, region_map, deadRegions);
244         processCyclicStateReverse(g, v, info, region_map, deadRegions);
245     }
246 
247     if (deadRegions.empty()) {
248         return;
249     }
250 
251     vector<NFAVertex> dead;
252 
253     for (auto v : vertices_range(g)) {
254         if (is_special(v, g)) {
255             continue;
256         }
257         u32 region = region_map.at(v);
258         if (contains(deadRegions, region)) {
259             dead.push_back(v);
260         }
261     }
262 
263     if (!dead.empty()) {
264         DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n",
265                      dead.size(), deadRegions.size());
266         remove_vertices(dead, g);
267     }
268 }
269 
270 } // namespace ue2
271