1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief NFA acceleration analysis code.
31  */
32 #include "ng_limex_accel.h"
33 
34 #include "ng_holder.h"
35 #include "ng_misc_opt.h"
36 #include "ng_util.h"
37 #include "ue2common.h"
38 
39 #include "nfa/accel.h"
40 
41 #include "util/bitutils.h" // for CASE_CLEAR
42 #include "util/charreach.h"
43 #include "util/compile_context.h"
44 #include "util/container.h"
45 #include "util/dump_charclass.h"
46 #include "util/graph_range.h"
47 #include "util/small_vector.h"
48 #include "util/target_info.h"
49 
50 #include <algorithm>
51 #include <map>
52 
53 #include <boost/range/adaptor/map.hpp>
54 
55 using namespace std;
56 using boost::adaptors::map_keys;
57 
58 namespace ue2 {
59 
60 #define WIDE_FRIEND_MIN 200
61 
62 static
findAccelFriendGeneration(const NGHolder & g,const CharReach & cr,const flat_set<NFAVertex> & cands,const flat_set<NFAVertex> & preds,flat_set<NFAVertex> * next_cands,flat_set<NFAVertex> * next_preds,flat_set<NFAVertex> * friends)63 void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
64                                const flat_set<NFAVertex> &cands,
65                                const flat_set<NFAVertex> &preds,
66                                flat_set<NFAVertex> *next_cands,
67                                flat_set<NFAVertex> *next_preds,
68                                flat_set<NFAVertex> *friends) {
69     for (auto v : cands) {
70         if (contains(preds, v)) {
71             continue;
72         }
73 
74         const CharReach &acr = g[v].char_reach;
75         DEBUG_PRINTF("checking %zu\n", g[v].index);
76 
77         if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
78             DEBUG_PRINTF("bad reach %zu\n", acr.count());
79             continue;
80         }
81 
82         for (auto u : inv_adjacent_vertices_range(v, g)) {
83             if (!contains(preds, u)) {
84                 DEBUG_PRINTF("bad pred\n");
85                 goto next_cand;
86             }
87         }
88 
89         next_preds->insert(v);
90         insert(next_cands, adjacent_vertices(v, g));
91 
92         DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
93         friends->insert(v);
94     next_cand:;
95     }
96 }
97 
findAccelFriends(const NGHolder & g,NFAVertex v,const map<NFAVertex,BoundedRepeatSummary> & br_cyclic,u32 offset,flat_set<NFAVertex> * friends)98 void findAccelFriends(const NGHolder &g, NFAVertex v,
99                       const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
100                       u32 offset, flat_set<NFAVertex> *friends) {
101     /* A friend of an accel state is a successor state which can only be on when
102      * the accel is on. This requires that it has a subset of the accel state's
103      * preds and a charreach which is a subset of the accel state.
104      *
105      * A friend can be safely ignored when accelerating provided there is
106      * sufficient back-off. A friend is useful if it has a wide reach.
107      */
108 
109     /* BR cyclic states which may go stale cannot have friends as they may
110      * suddenly turn off leading their so-called friends stranded and alone.
111      * TODO: restrict to only stale going BR cyclics
112      */
113     if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
114         return;
115     }
116 
117     u32 friend_depth = offset + 1;
118 
119     flat_set<NFAVertex> preds;
120     insert(&preds, inv_adjacent_vertices(v, g));
121     const CharReach &cr = g[v].char_reach;
122 
123     flat_set<NFAVertex> cands;
124     insert(&cands, adjacent_vertices(v, g));
125 
126     flat_set<NFAVertex> next_preds;
127     flat_set<NFAVertex> next_cands;
128     for (u32 i = 0; i < friend_depth; i++) {
129         findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
130                                   friends);
131         preds.insert(next_preds.begin(), next_preds.end());
132         next_preds.clear();
133         cands.swap(next_cands);
134         next_cands.clear();
135     }
136 }
137 
138 static
findPaths(const NGHolder & g,NFAVertex v,const vector<CharReach> & refined_cr,vector<vector<CharReach>> * paths,const flat_set<NFAVertex> & forbidden,u32 depth)139 void findPaths(const NGHolder &g, NFAVertex v,
140                const vector<CharReach> &refined_cr,
141                vector<vector<CharReach>> *paths,
142                const flat_set<NFAVertex> &forbidden, u32 depth) {
143     static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
144     if (!depth) {
145         paths->push_back({});
146         return;
147     }
148     if (v == g.accept || v == g.acceptEod) {
149         paths->push_back({});
150         if (!generates_callbacks(g) || v == g.acceptEod) {
151             paths->back().push_back(CharReach()); /* red tape options */
152         }
153         return;
154     }
155 
156     /* for the escape 'literals' we want to use the minimal cr so we
157      * can be more selective */
158     const CharReach &cr = refined_cr[g[v].index];
159 
160     if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
161         || hasSelfLoop(v, g)) {
162         /* give up on pushing past this point */
163         paths->push_back({cr});
164         return;
165     }
166 
167     vector<vector<CharReach>> curr;
168     for (auto w : adjacent_vertices_range(v, g)) {
169         if (contains(forbidden, w)) {
170             /* path has looped back to one of the active+boring acceleration
171              * states.  We can ignore this path if we have sufficient back-
172              * off. */
173             paths->push_back({cr});
174             continue;
175         }
176 
177         u32 new_depth = depth - 1;
178         do {
179             curr.clear();
180             findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
181         } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
182 
183         for (auto &c : curr) {
184             c.push_back(cr);
185             paths->push_back(std::move(c));
186         }
187     }
188 }
189 
190 namespace {
191 struct SAccelScheme {
SAccelSchemeue2::__anon9733f40b0111::SAccelScheme192     SAccelScheme(CharReach cr_in, u32 offset_in)
193         : cr(std::move(cr_in)), offset(offset_in) {
194         assert(offset <= MAX_ACCEL_DEPTH);
195     }
196 
SAccelSchemeue2::__anon9733f40b0111::SAccelScheme197     SAccelScheme() {}
198 
operator <ue2::__anon9733f40b0111::SAccelScheme199     bool operator<(const SAccelScheme &b) const {
200         const SAccelScheme &a = *this;
201 
202         const size_t a_count = cr.count(), b_count = b.cr.count();
203         if (a_count != b_count) {
204             return a_count < b_count;
205         }
206 
207         /* TODO: give bonus if one is a 'caseless' character */
208         ORDER_CHECK(offset);
209         ORDER_CHECK(cr);
210         return false;
211     }
212 
213     CharReach cr = CharReach::dot();
214     u32 offset = MAX_ACCEL_DEPTH + 1;
215 };
216 }
217 
218 /**
219  * \brief Limit on the number of (recursive) calls to findBestInternal().
220  */
221 static constexpr size_t MAX_FINDBEST_CALLS = 1000000;
222 
223 static
findBestInternal(vector<vector<CharReach>>::const_iterator pb,vector<vector<CharReach>>::const_iterator pe,size_t * num_calls,const SAccelScheme & curr,SAccelScheme * best)224 void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
225                       vector<vector<CharReach>>::const_iterator pe,
226                       size_t *num_calls, const SAccelScheme &curr,
227                       SAccelScheme *best) {
228     assert(curr.offset <= MAX_ACCEL_DEPTH);
229 
230     if (++(*num_calls) > MAX_FINDBEST_CALLS) {
231         DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
232         return;
233     }
234 
235     DEBUG_PRINTF("paths left %zu\n", pe - pb);
236     if (pb == pe) {
237         if (curr < *best) {
238             *best = curr;
239             DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n",
240                          best->cr.count(), describeClass(best->cr).c_str(),
241                          best->offset);
242         }
243         return;
244     }
245 
246     DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
247 
248     small_vector<SAccelScheme, 10> priority_path;
249     priority_path.reserve(pb->size());
250     u32 i = 0;
251     for (auto p = pb->begin(); p != pb->end(); ++p, i++) {
252         SAccelScheme as(*p | curr.cr, max(i, curr.offset));
253         if (*best < as) {
254             DEBUG_PRINTF("worse\n");
255             continue;
256         }
257         priority_path.push_back(move(as));
258     }
259 
260     sort(priority_path.begin(), priority_path.end());
261     for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
262         auto jt = next(it);
263         for (; jt != priority_path.end(); ++jt) {
264             if (!it->cr.isSubsetOf(jt->cr)) {
265                 break;
266             }
267         }
268         priority_path.erase(next(it), jt);
269         DEBUG_PRINTF("||%zu\n", it->cr.count());
270     }
271     DEBUG_PRINTF("---\n");
272 
273     for (const SAccelScheme &in : priority_path) {
274         DEBUG_PRINTF("in: count %zu\n", in.cr.count());
275         if (*best < in) {
276             DEBUG_PRINTF("worse\n");
277             continue;
278         }
279         findBestInternal(pb + 1, pe, num_calls, in, best);
280 
281         if (curr.cr == best->cr) {
282             return; /* could only get better by offset */
283         }
284     }
285 }
286 
287 static
findBest(const vector<vector<CharReach>> & paths,const CharReach & terminating)288 SAccelScheme findBest(const vector<vector<CharReach>> &paths,
289                       const CharReach &terminating) {
290     SAccelScheme curr(terminating, 0U);
291     SAccelScheme best;
292     size_t num_calls = 0;
293     findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best);
294     DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls);
295     DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n",
296                  best.cr.count(), describeClass(best.cr).c_str(), best.offset);
297     return best;
298 }
299 
300 namespace {
301 struct DAccelScheme {
DAccelSchemeue2::__anon9733f40b0211::DAccelScheme302     DAccelScheme(CharReach cr_in, u32 offset_in)
303         : double_cr(std::move(cr_in)), double_offset(offset_in) {
304         assert(double_offset <= MAX_ACCEL_DEPTH);
305     }
306 
operator <ue2::__anon9733f40b0211::DAccelScheme307     bool operator<(const DAccelScheme &b) const {
308         const DAccelScheme &a = *this;
309 
310         size_t a_dcount = a.double_cr.count();
311         size_t b_dcount = b.double_cr.count();
312 
313         assert(!a.double_byte.empty() || a_dcount || a.double_offset);
314         assert(!b.double_byte.empty() || b_dcount || b.double_offset);
315 
316         if (a_dcount != b_dcount) {
317             return a_dcount < b_dcount;
318         }
319 
320         if (!a_dcount) {
321             bool cd_a = buildDvermMask(a.double_byte);
322             bool cd_b = buildDvermMask(b.double_byte);
323             if (cd_a != cd_b) {
324                 return cd_a > cd_b;
325             }
326         }
327 
328         ORDER_CHECK(double_byte.size());
329         ORDER_CHECK(double_offset);
330 
331         /* TODO: give bonus if one is a 'caseless' character */
332         ORDER_CHECK(double_byte);
333         ORDER_CHECK(double_cr);
334 
335         return false;
336     }
337 
338     flat_set<pair<u8, u8>> double_byte;
339     CharReach double_cr;
340     u32 double_offset = 0;
341 };
342 }
343 
344 static
make_double_accel(DAccelScheme as,CharReach cr_1,const CharReach & cr_2_in,u32 offset_in)345 DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
346                                const CharReach &cr_2_in, u32 offset_in) {
347     cr_1 &= ~as.double_cr;
348     CharReach cr_2 = cr_2_in & ~as.double_cr;
349     u32 offset = offset_in;
350 
351     if (cr_1.none()) {
352         DEBUG_PRINTF("empty first element\n");
353         ENSURE_AT_LEAST(&as.double_offset, offset);
354         return as;
355     }
356 
357     if (cr_2_in != cr_2 || cr_2.none()) {
358         offset = offset_in + 1;
359     }
360 
361     size_t two_count = cr_1.count() * cr_2.count();
362 
363     DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
364 
365     if (!two_count) {
366         DEBUG_PRINTF("empty element\n");
367         ENSURE_AT_LEAST(&as.double_offset, offset);
368         return as;
369     }
370 
371     if (two_count > DOUBLE_SHUFTI_LIMIT) {
372         if (cr_2.count() < cr_1.count()) {
373             as.double_cr |= cr_2;
374             offset = offset_in + 1;
375         } else {
376             as.double_cr |= cr_1;
377         }
378     } else {
379         for (auto i = cr_1.find_first(); i != CharReach::npos;
380              i = cr_1.find_next(i)) {
381             for (auto j = cr_2.find_first(); j != CharReach::npos;
382                  j = cr_2.find_next(j)) {
383                 as.double_byte.emplace(i, j);
384             }
385         }
386     }
387 
388     ENSURE_AT_LEAST(&as.double_offset, offset);
389     DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
390                  as.double_byte.size(), as.double_cr.count(), as.double_offset);
391     return as;
392 }
393 
394 static
findDoubleBest(vector<vector<CharReach>>::const_iterator pb,vector<vector<CharReach>>::const_iterator pe,const DAccelScheme & curr,DAccelScheme * best)395 void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
396               vector<vector<CharReach> >::const_iterator pe,
397               const DAccelScheme &curr, DAccelScheme *best) {
398     assert(curr.double_offset <= MAX_ACCEL_DEPTH);
399     DEBUG_PRINTF("paths left %zu\n", pe - pb);
400     DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
401                  curr.double_byte.size(), curr.double_cr.count(),
402                  curr.double_offset);
403     if (pb == pe) {
404         if (curr < *best) {
405             *best = curr;
406             DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
407                          best->double_byte.size(), best->double_cr.count(),
408                          best->double_offset);
409         }
410         return;
411     }
412 
413     DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
414 
415     small_vector<DAccelScheme, 10> priority_path;
416     priority_path.reserve(pb->size());
417     u32 i = 0;
418     for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
419          ++p, i++) {
420         DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
421         if (*best < as) {
422             DEBUG_PRINTF("worse\n");
423             continue;
424         }
425         priority_path.push_back(move(as));
426     }
427 
428     sort(priority_path.begin(), priority_path.end());
429     DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
430     DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
431                  best->double_byte.size(), best->double_cr.count(),
432                  best->double_offset);
433 
434     for (const DAccelScheme &in : priority_path) {
435         DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
436                      in.double_byte.size(), in.double_cr.count(),
437                      in.double_offset);
438         if (*best < in) {
439             DEBUG_PRINTF("worse\n");
440             continue;
441         }
442         findDoubleBest(pb + 1, pe, in, best);
443     }
444 }
445 
446 #ifdef DEBUG
447 static
dumpPaths(const vector<vector<CharReach>> & paths)448 void dumpPaths(const vector<vector<CharReach>> &paths) {
449     for (const auto &path : paths) {
450         DEBUG_PRINTF("path: [");
451         for (const auto &cr : path) {
452             printf(" [");
453             describeClass(stdout, cr, 20, CC_OUT_TEXT);
454             printf("]");
455         }
456         printf(" ]\n");
457     }
458 }
459 #endif
460 
461 static
blowoutPathsLessStrictSegment(vector<vector<CharReach>> & paths)462 void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
463     /* paths segments which are a superset of an earlier segment should never be
464      * picked as an acceleration segment -> to improve processing just replace
465      * with dot */
466     for (auto &p : paths) {
467         for (auto it = p.begin(); it != p.end();  ++it) {
468             for (auto jt = next(it); jt != p.end(); ++jt) {
469                 if (it->isSubsetOf(*jt)) {
470                     *jt = CharReach::dot();
471                 }
472             }
473         }
474     }
475 }
476 
477 static
unifyPathsLastSegment(vector<vector<CharReach>> & paths)478 void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
479     /* try to unify paths which only differ in the last segment */
480     for (vector<vector<CharReach> >::iterator p = paths.begin();
481          p != paths.end() && p + 1 != paths.end();) {
482         vector<CharReach> &a = *p;
483         vector<CharReach> &b = *(p + 1);
484 
485         if (a.size() != b.size()) {
486             ++p;
487             continue;
488         }
489 
490         u32 i = 0;
491         for (; i < a.size() - 1; i++) {
492             if (a[i] != b[i]) {
493                 break;
494             }
495         }
496         if (i == a.size() - 1) {
497             /* we can unify these paths */
498             a[i] |= b[i];
499             paths.erase(p + 1);
500         } else {
501             ++p;
502         }
503     }
504 }
505 
506 static
improvePaths(vector<vector<CharReach>> & paths)507 void improvePaths(vector<vector<CharReach> > &paths) {
508 #ifdef DEBUG
509     DEBUG_PRINTF("orig paths\n");
510     dumpPaths(paths);
511 #endif
512     blowoutPathsLessStrictSegment(paths);
513 
514     sort(paths.begin(), paths.end());
515 
516     unifyPathsLastSegment(paths);
517 
518 #ifdef DEBUG
519     DEBUG_PRINTF("opt paths\n");
520     dumpPaths(paths);
521 #endif
522 }
523 
524 #define MAX_DOUBLE_ACCEL_PATHS 10
525 
526 static
findBestDoubleAccelScheme(vector<vector<CharReach>> paths,const CharReach & terminating)527 DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
528                                        const CharReach &terminating) {
529     DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
530                  terminating.count());
531     unifyPathsLastSegment(paths);
532 
533 #ifdef DEBUG
534     DEBUG_PRINTF("paths:\n");
535     dumpPaths(paths);
536 #endif
537 
538     /* if there are too many paths, shorten the paths to reduce the number of
539      * distinct paths we have to consider */
540     while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
541         for (auto &p : paths) {
542             if (p.empty()) {
543                 return DAccelScheme(terminating, 0U);
544             }
545             p.pop_back();
546         }
547         unifyPathsLastSegment(paths);
548     }
549 
550     if (paths.empty()) {
551         return DAccelScheme(terminating, 0U);
552     }
553 
554     DAccelScheme curr(terminating, 0U);
555     DAccelScheme best(CharReach::dot(), 0U);
556     findDoubleBest(paths.begin(), paths.end(), curr, &best);
557     DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(),
558                  best.double_cr.count());
559     return best;
560 }
561 
562 #define MAX_EXPLORE_PATHS 40
563 
findBestAccelScheme(vector<vector<CharReach>> paths,const CharReach & terminating,bool look_for_double_byte)564 AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
565                                 const CharReach &terminating,
566                                 bool look_for_double_byte) {
567     AccelScheme rv;
568     if (look_for_double_byte) {
569         DAccelScheme da = findBestDoubleAccelScheme(paths, terminating);
570         if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) {
571             rv.double_byte = std::move(da.double_byte);
572             rv.double_cr = move(da.double_cr);
573             rv.double_offset = da.double_offset;
574         }
575     }
576 
577     improvePaths(paths);
578 
579     DEBUG_PRINTF("we have %zu paths\n", paths.size());
580     if (paths.size() > MAX_EXPLORE_PATHS) {
581         return rv; /* too many paths to explore */
582     }
583 
584     /* if we were smart we would do something netflowy on the paths to find the
585      * best cut. But we aren't, so we will just brute force it.
586      */
587     SAccelScheme best = findBest(paths, terminating);
588 
589     /* find best is a bit lazy in terms of minimising the offset, see if we can
590      * make it better. need to find the min max offset that we need.*/
591     u32 offset = 0;
592     for (const auto &path : paths) {
593         u32 i = 0;
594         for (const auto &cr : path) {
595             if (cr.isSubsetOf(best.cr)) {
596                 break;
597             }
598             i++;
599         }
600         offset = MAX(offset, i);
601     }
602     assert(offset <= best.offset);
603     best.offset = offset;
604 
605     rv.offset = best.offset;
606     rv.cr = best.cr;
607     if (rv.cr.count() < rv.double_cr.count()) {
608         rv.double_byte.clear();
609     }
610 
611     return rv;
612 }
613 
nfaFindAccel(const NGHolder & g,const vector<NFAVertex> & verts,const vector<CharReach> & refined_cr,const map<NFAVertex,BoundedRepeatSummary> & br_cyclic,bool allow_wide,bool look_for_double_byte)614 AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
615                          const vector<CharReach> &refined_cr,
616                          const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
617                          bool allow_wide, bool look_for_double_byte) {
618     CharReach terminating;
619     for (auto v : verts) {
620         if (!hasSelfLoop(v, g)) {
621             DEBUG_PRINTF("no self loop\n");
622             return AccelScheme(); /* invalid scheme */
623         }
624 
625         // check that this state is reachable on most characters
626         terminating |= ~g[v].char_reach;
627     }
628 
629     DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
630     size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
631                               : ACCEL_MAX_STOP_CHAR;
632     if (terminating.count() > limit) {
633         return AccelScheme(); /* invalid scheme */
634     }
635 
636     vector<vector<CharReach>> paths;
637     flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
638 
639     /* Note: we can not in general (TODO: ignore when possible) ignore entries
640      * into the bounded repeat cyclic states as that is when the magic happens
641      */
642     for (auto v : br_cyclic | map_keys) {
643         /* TODO: can allow if repeatMin <= 1 ? */
644         ignore_vert_set.erase(v);
645     }
646 
647     for (auto v : verts) {
648         for (auto w : adjacent_vertices_range(v, g)) {
649             if (w != v) {
650                 findPaths(g, w, refined_cr, &paths, ignore_vert_set,
651                           MAX_ACCEL_DEPTH);
652             }
653         }
654     }
655 
656     /* paths built wrong: reverse them */
657     for (auto &path : paths) {
658         reverse(path.begin(), path.end());
659     }
660 
661     return findBestAccelScheme(std::move(paths), terminating,
662                                look_for_double_byte);
663 }
664 
get_sds_or_proxy(const NGHolder & g)665 NFAVertex get_sds_or_proxy(const NGHolder &g) {
666     DEBUG_PRINTF("looking for sds proxy\n");
667     if (proper_out_degree(g.startDs, g)) {
668         return g.startDs;
669     }
670 
671     NFAVertex v = NGHolder::null_vertex();
672     for (auto w : adjacent_vertices_range(g.start, g)) {
673         if (w != g.startDs) {
674             if (!v) {
675                 v = w;
676             } else {
677                 return g.startDs;
678             }
679         }
680     }
681 
682     if (!v) {
683         return g.startDs;
684     }
685 
686     while (true) {
687         if (hasSelfLoop(v, g)) {
688             DEBUG_PRINTF("woot %zu\n", g[v].index);
689             return v;
690         }
691         if (out_degree(v, g) != 1) {
692             break;
693         }
694         NFAVertex u = getSoleDestVertex(g, v);
695         if (!g[u].char_reach.all()) {
696             break;
697         }
698         v = u;
699     }
700 
701     return g.startDs;
702 }
703 
704 /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
nfaCheckAccel(const NGHolder & g,NFAVertex v,const vector<CharReach> & refined_cr,const map<NFAVertex,BoundedRepeatSummary> & br_cyclic,AccelScheme * as,bool allow_wide)705 bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
706                    const vector<CharReach> &refined_cr,
707                    const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
708                    AccelScheme *as, bool allow_wide) {
709     // For a state to be accelerable, our current criterion is that it be a
710     // large character class with a self-loop and narrow set of possible other
711     // successors (i.e. no special successors, union of successor reachability
712     // is small).
713     if (!hasSelfLoop(v, g)) {
714         return false;
715     }
716 
717     // check that this state is reachable on most characters
718     /* we want to use the maximal reach here (in the graph) */
719     CharReach terminating = g[v].char_reach;
720     terminating.flip();
721 
722     DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
723                  g[v].index, terminating.count(),
724                  allow_wide ? " (w)" : "");
725 
726     size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
727                               : ACCEL_MAX_STOP_CHAR;
728     if (terminating.count() > limit) {
729         DEBUG_PRINTF("too leaky\n");
730         return false;
731     }
732 
733     flat_set<NFAVertex> curr, next;
734 
735     insert(&curr, adjacent_vertices(v, g));
736     curr.erase(v); // erase self-loop
737 
738     // We consider offsets of zero through three; this is fairly arbitrary at
739     // present and could probably be increased (FIXME)
740     /* WARNING: would/could do horrible things to compile time */
741     bool stop = false;
742     vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
743     unsigned int depth;
744     for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
745         CharReach &cr = depthReach[depth];
746         for (auto t : curr) {
747             if (is_special(t, g)) {
748                 // We've bumped into the edge of the graph, so we should stop
749                 // searching.
750                 // Exception: iff our cyclic state is not a dot, than we can
751                 // safely accelerate towards an EOD accept.
752 
753                 /* Exception: nfas that don't generate callbacks so accepts are
754                  * fine too */
755                 if (t == g.accept && !generates_callbacks(g)) {
756                     stop = true; // don't search beyond this depth
757                     continue;
758                 } else if (t == g.accept) {
759                     goto depth_done;
760                 }
761 
762                 assert(t == g.acceptEod);
763                 stop = true; // don't search beyond this depth
764             } else {
765                 // Non-special vertex
766                 insert(&next, adjacent_vertices(t, g));
767                 /* for the escape 'literals' we want to use the minimal cr so we
768                  * can be more selective */
769                 cr |= refined_cr[g[t].index];
770             }
771         }
772 
773         cr |= terminating;
774         DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
775 
776         curr.swap(next);
777         next.clear();
778     }
779 
780 depth_done:
781 
782     if (depth == 0) {
783         return false;
784     }
785 
786     DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
787 
788     /* Look for the most awesome acceleration evar */
789     for (unsigned int i = 0; i < depth; i++) {
790         if (depthReach[i].none()) {
791             DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
792             *as = AccelScheme();
793             as->offset = i;
794             as->cr = CharReach();
795             return true;
796         }
797     }
798 
799     // First, loop over our depths and see if we have a suitable 2-byte
800     // caseful vermicelli option: this is the (second) fastest accel we have
801     if (depth > 1) {
802         for (unsigned int i = 0; i < (depth - 1); i++) {
803             const CharReach &cra = depthReach[i];
804             const CharReach &crb = depthReach[i + 1];
805             if ((cra.count() == 1 && crb.count() == 1)
806                 || (cra.count() == 2 && crb.count() == 2
807                     && cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
808                 DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
809                 *as = AccelScheme();
810                 as->offset = i;
811                 return true;
812             }
813         }
814     }
815 
816     // Second option: a two-byte shufti (i.e. less than eight 2-byte
817     // literals)
818     if (depth > 1) {
819         for (unsigned int i = 0; i < (depth - 1); i++) {
820             if (depthReach[i].count() * depthReach[i+1].count()
821                 <= DOUBLE_SHUFTI_LIMIT) {
822                 DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
823                 *as = AccelScheme();
824                 as->offset = i;
825                 return true;
826             }
827         }
828     }
829 
830     // Look for offset accel schemes verm/shufti;
831     vector<NFAVertex> verts(1, v);
832     *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
833     DEBUG_PRINTF("as width %zu\n", as->cr.count());
834     return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
835 }
836 
837 } // namespace ue2
838