1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Reverse acceleration analysis.
31  */
32 #include "ng_revacc.h"
33 
34 #include "grey.h"
35 #include "ng_holder.h"
36 #include "ue2common.h"
37 #include "nfa/accel.h"
38 #include "nfa/nfa_internal.h"
39 #include "util/bitutils.h"
40 #include "util/charreach.h"
41 #include "util/graph_range.h"
42 
43 #include <set>
44 
45 using namespace std;
46 
47 namespace ue2 {
48 
49 static
isPseudoNoCaseChar(const CharReach & cr)50 bool isPseudoNoCaseChar(const CharReach &cr) {
51     return cr.count() == 2 && !(cr.find_first() & 32)
52         && cr.test(cr.find_first() | 32);
53 }
54 
55 static
lookForEodSchemes(const RevAccInfo & rev_info,const u32 minWidth,NFA * nfa)56 bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth,
57                        NFA *nfa) {
58     DEBUG_PRINTF("pure eod triggered pattern\n");
59 
60     /* 2 char */
61     for (u8 nocase = 0; nocase < 2; nocase++) {
62         for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
63             const CharReach &cr = rev_info.acceptEodReach[i];
64             const CharReach &cr2 = rev_info.acceptEodReach[i - 1];
65 
66             if (!nocase && cr.count() == 1 && cr2.count() == 1) {
67                 assert(i < minWidth);
68                 if (i >= minWidth) {
69                     goto single;
70                 }
71                 nfa->rAccelType = ACCEL_RDEOD;
72                 nfa->rAccelData.array[0] = (u8)cr.find_first();
73                 nfa->rAccelData.array[1] = (u8)cr2.find_first();
74                 nfa->rAccelOffset = i + 1;
75                 DEBUG_PRINTF("raccel eod x2 %u %04hx\n",
76                              nfa->rAccelOffset, nfa->rAccelData.dc);
77                 return true;
78             } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
79                        && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
80                 assert(i < minWidth);
81                 if (i >= minWidth) {
82                     goto single;
83                 }
84                 nfa->rAccelType = ACCEL_RDEOD_NOCASE;
85                 nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;  /* uppercase */
86                 nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
87                 nfa->rAccelOffset = i + 1;
88                 DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n",
89                              nfa->rAccelOffset, nfa->rAccelData.dc);
90                 return true;
91             }
92         }
93     }
94 
95  single:
96     /* 1 char */
97     for (u8 nocase = 0; nocase < 2; nocase++) {
98         for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
99             const CharReach &cr = rev_info.acceptEodReach[i];
100             if (!nocase && cr.count() == 1) {
101                 assert(i < minWidth);
102                 if (i >= minWidth) {
103                     return false;
104                 }
105                 nfa->rAccelType = ACCEL_REOD;
106                 nfa->rAccelData.c = (u8) cr.find_first();
107                 nfa->rAccelOffset = i + 1;
108                 DEBUG_PRINTF("raccel eod %u %02hhx\n",
109                              nfa->rAccelOffset, nfa->rAccelData.c);
110                 return true;
111             } else if (nocase && isPseudoNoCaseChar(cr)) {
112                 assert(i < minWidth);
113                 if (i >= minWidth) {
114                     return false;
115                 }
116                 nfa->rAccelType = ACCEL_REOD_NOCASE;
117                 nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */
118                 nfa->rAccelOffset = i + 1;
119                 DEBUG_PRINTF("raccel nc eod %u %02hhx\n",
120                              nfa->rAccelOffset, nfa->rAccelData.c);
121                 return true;
122             }
123         }
124     }
125 
126     return false;
127 }
128 
129 static
lookForFloatingSchemes(const RevAccInfo & rev_info,const u32 minWidth,NFA * nfa)130 bool lookForFloatingSchemes(const RevAccInfo &rev_info,
131                             const u32 minWidth, NFA *nfa) {
132     /* 2 char */
133     for (u8 nocase = 0; nocase < 2; nocase++) {
134         for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
135             CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
136             CharReach cr2 = rev_info.acceptEodReach[i - 1]
137                             | rev_info.acceptReach[i - 1];
138             if (!nocase && cr.count() == 1 && cr2.count() == 1) {
139                 assert((u8)(i - 1) < minWidth);
140                 if (i > minWidth) {
141                     goto single;
142                 }
143                 nfa->rAccelType = ACCEL_RDVERM;
144                 nfa->rAccelData.array[0] = (u8)cr.find_first();
145                 nfa->rAccelData.array[1] = (u8)cr2.find_first();
146                 nfa->rAccelOffset = i;
147                 DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n",
148                              nfa->rAccelOffset, nfa->rAccelData.array[0],
149                              nfa->rAccelData.array[1]);
150                 return true;
151             } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
152                         && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
153                 assert((u8)(i - 1) < minWidth);
154                 if (i > minWidth) {
155                     goto single;
156                 }
157                 nfa->rAccelType = ACCEL_RDVERM_NOCASE;
158                 nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;
159                 nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
160                 nfa->rAccelOffset = i;
161                 DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n",
162                              nfa->rAccelOffset, nfa->rAccelData.array[0],
163                              nfa->rAccelData.array[1]);
164                 return true;
165             }
166         }
167     }
168 
169  single:
170     /* 1 char */
171     for (u8 nocase = 0; nocase < 2; nocase++) {
172         for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
173             CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
174             if (!nocase && cr.count() == 1) {
175                 assert(i < minWidth);
176                 if (i >= minWidth) {
177                     return false;
178                 }
179                 nfa->rAccelType = ACCEL_RVERM;
180                 nfa->rAccelData.c = (u8)cr.find_first();
181                 nfa->rAccelOffset = i + 1;
182                 DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset,
183                              nfa->rAccelData.c);
184                 return true;
185             } else if (nocase && isPseudoNoCaseChar(cr)) {
186                 assert(i < minWidth);
187                 if (i >= minWidth) {
188                     return false;
189                 }
190                 nfa->rAccelType = ACCEL_RVERM_NOCASE;
191                 nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */
192                 nfa->rAccelOffset = i + 1;
193                 DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset,
194                              nfa->rAccelData.c);
195                 return true;
196             }
197         }
198     }
199 
200     return false;
201 }
202 
buildReverseAcceleration(NFA * nfa,const RevAccInfo & rev_info,u32 min_width,bool eod_only)203 void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info,
204                               u32 min_width, bool eod_only) {
205     assert(nfa);
206 
207     if (!rev_info.valid) {
208         return;
209     }
210 
211     nfa->rAccelOffset = 1;
212 
213     assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any());
214     if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) {
215         DEBUG_PRINTF("expected path to accept\n");
216         return;
217     }
218 
219     if (rev_info.acceptReach[0].none()) {
220         /* eod only */
221 
222         if (lookForEodSchemes(rev_info, min_width, nfa)) {
223             assert(nfa->rAccelOffset <= min_width);
224             return;
225         }
226     }
227 
228     if (eod_only) {
229         return;
230     }
231 
232     if (!lookForFloatingSchemes(rev_info, min_width, nfa)) {
233         DEBUG_PRINTF("failed to accelerate\n");
234     }
235 }
236 
237 static
populateRevAccelInfo(const NGHolder & g,NFAVertex terminal,vector<CharReach> * reach)238 void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal,
239                           vector<CharReach> *reach) {
240     set<NFAVertex> vset;
241 
242     for (auto v : inv_adjacent_vertices_range(terminal, g)) {
243         if (!is_special(v, g)) {
244             vset.insert(v);
245         }
246     }
247 
248     for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) {
249         set<NFAVertex> next;
250 
251         for (auto v : vset) {
252             const CharReach &cr = g[v].char_reach;
253             (*reach)[offset] |= cr;
254 
255             DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(),
256                          (*reach)[offset].count());
257 
258             for (auto u : inv_adjacent_vertices_range(v, g)) {
259                 if (u == g.start || u == g.startDs) {
260                     /* kill all subsequent offsets by setting to dot, setting
261                      * to dot is in someways not accurate as there may be no
262                      * data at all but neither case can be accelerated */
263                     for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) {
264                         (*reach)[i].setall();
265                     }
266                     break;
267                 } else if (!is_special(u, g)) {
268                     next.insert(u);
269                 }
270             }
271         }
272 
273         swap(vset, next);
274     }
275 }
276 
populateReverseAccelerationInfo(RevAccInfo & rai,const NGHolder & g)277 void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) {
278     DEBUG_PRINTF("pop rev info\n");
279     populateRevAccelInfo(g, g.accept, &rai.acceptReach);
280     populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach);
281     rai.valid = true;
282 }
283 
mergeReverseAccelerationInfo(RevAccInfo & dest,const RevAccInfo & vic)284 void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) {
285     DEBUG_PRINTF("merging ra\n");
286 
287     dest.valid &= vic.valid;
288 
289     for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
290         dest.acceptReach[i]    |= vic.acceptReach[i];
291         dest.acceptEodReach[i] |= vic.acceptEodReach[i];
292     }
293 }
294 
RevAccInfo(void)295 RevAccInfo::RevAccInfo(void)
296     : valid(false), acceptReach(MAX_RACCEL_OFFSET),
297       acceptEodReach(MAX_RACCEL_OFFSET) {}
298 
299 } // namespace ue2
300