1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "accel_dfa_build_strat.h"
30 
31 #include "accel.h"
32 #include "grey.h"
33 #include "nfagraph/ng_limex_accel.h"
34 #include "shufticompile.h"
35 #include "trufflecompile.h"
36 #include "util/accel_scheme.h"
37 #include "util/charreach.h"
38 #include "util/container.h"
39 #include "util/dump_charclass.h"
40 #include "util/small_vector.h"
41 #include "util/verify_types.h"
42 
43 #include <sstream>
44 #include <unordered_map>
45 #include <unordered_set>
46 #include <vector>
47 
48 #define PATHS_LIMIT 500
49 
50 using namespace std;
51 
52 namespace ue2 {
53 
54 namespace {
55 struct path {
56     small_vector<CharReach, MAX_ACCEL_DEPTH + 1> reach;
57     dstate_id_t dest = DEAD_STATE;
pathue2::__anonfd77354d0111::path58     explicit path(dstate_id_t base) : dest(base) {}
59 };
60 };
61 
62 template<typename Container>
dump_paths(const Container & paths)63 void dump_paths(const Container &paths) {
64     for (UNUSED const path &p : paths) {
65         DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest);
66     }
67     DEBUG_PRINTF("%zu paths\n", paths.size());
68 }
69 
70 static
reverse_alpha_remapping(const raw_dfa & rdfa)71 vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
72     vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
73 
74     for (u32 i = 0; i < N_CHARS; i++) {
75         rv.at(rdfa.alpha_remap[i]).set(i);
76     }
77 
78     return rv;
79 }
80 
81 static
is_useful_path(const vector<path> & good,const path & p)82 bool is_useful_path(const vector<path> &good, const path &p) {
83     for (const auto &g : good) {
84         assert(g.dest == p.dest);
85         assert(g.reach.size() <= p.reach.size());
86         auto git = g.reach.rbegin();
87         auto pit = p.reach.rbegin();
88 
89         for (; git != g.reach.rend(); ++git, ++pit) {
90             if (!pit->isSubsetOf(*git)) {
91                 goto next;
92             }
93         }
94         DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(),
95                      g.dest);
96 
97         return false;
98     next:;
99     }
100 
101     return true;
102 }
103 
104 static
append(const path & orig,const CharReach & cr,u32 new_dest)105 path append(const path &orig, const CharReach &cr, u32 new_dest) {
106     path p(new_dest);
107     p.reach = orig.reach;
108     p.reach.push_back(cr);
109 
110     return p;
111 }
112 
113 static
extend(const raw_dfa & rdfa,const vector<CharReach> & rev_map,const path & p,unordered_map<u32,vector<path>> & all,vector<path> & out)114 void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map,
115             const path &p, unordered_map<u32, vector<path>> &all,
116             vector<path> &out) {
117     const dstate &s = rdfa.states[p.dest];
118 
119     if (!p.reach.empty() && p.reach.back().none()) {
120         out.push_back(p);
121         return;
122     }
123 
124     if (!s.reports.empty()) {
125         if (generates_callbacks(rdfa.kind)) {
126             out.push_back(p);
127             return;
128         } else {
129             path pp = append(p, CharReach(), p.dest);
130             all[p.dest].push_back(pp);
131             out.push_back(move(pp));
132         }
133     }
134 
135     if (!s.reports_eod.empty()) {
136         path pp = append(p, CharReach(), p.dest);
137         all[p.dest].push_back(pp);
138         out.push_back(move(pp));
139     }
140 
141     flat_map<u32, CharReach> dest;
142     for (u32 i = 0; i < rev_map.size(); i++) {
143         u32 succ = s.next[i];
144         dest[succ] |= rev_map[i];
145     }
146 
147     for (const auto &e : dest) {
148         path pp = append(p, e.second, e.first);
149         if (!is_useful_path(all[e.first], pp)) {
150             DEBUG_PRINTF("not useful: [%s] -> %u\n",
151                          describeClasses(pp.reach).c_str(), pp.dest);
152             continue;
153         }
154 
155         DEBUG_PRINTF("----good: [%s] -> %u\n",
156                      describeClasses(pp.reach).c_str(), pp.dest);
157         all[e.first].push_back(pp);
158         out.push_back(move(pp));
159     }
160 }
161 
162 static
generate_paths(const raw_dfa & rdfa,dstate_id_t base,u32 len)163 vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
164                                          dstate_id_t base, u32 len) {
165     const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
166     vector<path> paths{path(base)};
167     unordered_map<u32, vector<path>> all;
168     all[base].push_back(path(base));
169     for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
170         vector<path> next_gen;
171         for (const auto &p : paths) {
172             extend(rdfa, rev_map, p, all, next_gen);
173         }
174 
175         paths = move(next_gen);
176     }
177 
178     dump_paths(paths);
179 
180     vector<vector<CharReach>> rv;
181     rv.reserve(paths.size());
182     for (auto &p : paths) {
183         rv.push_back(vector<CharReach>(std::make_move_iterator(p.reach.begin()),
184                                        std::make_move_iterator(p.reach.end())));
185     }
186     return rv;
187 }
188 
189 static
look_for_offset_accel(const raw_dfa & rdfa,dstate_id_t base,u32 max_allowed_accel_offset)190 AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
191                                   u32 max_allowed_accel_offset) {
192     DEBUG_PRINTF("looking for accel for %hu\n", base);
193     vector<vector<CharReach>> paths =
194         generate_paths(rdfa, base, max_allowed_accel_offset + 1);
195     AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
196     DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
197     return as;
198 }
199 
200 static UNUSED
better(const AccelScheme & a,const AccelScheme & b)201 bool better(const AccelScheme &a, const AccelScheme &b) {
202     if (!a.double_byte.empty() && b.double_byte.empty()) {
203         return true;
204     }
205 
206     if (!b.double_byte.empty()) {
207         return false;
208     }
209 
210     return a.cr.count() < b.cr.count();
211 }
212 
213 static
double_byte_ok(const AccelScheme & info)214 bool double_byte_ok(const AccelScheme &info) {
215     return !info.double_byte.empty() &&
216            info.double_cr.count() < info.double_byte.size() &&
217            info.double_cr.count() <= 2;
218 }
219 
220 static
has_self_loop(dstate_id_t s,const raw_dfa & raw)221 bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
222     u16 top_remap = raw.alpha_remap[TOP];
223     for (u32 i = 0; i < raw.states[s].next.size(); i++) {
224         if (i != top_remap && raw.states[s].next[i] == s) {
225             return true;
226         }
227     }
228     return false;
229 }
230 
231 static
find_nonexit_symbols(const raw_dfa & rdfa,const CharReach & escape)232 flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa,
233                                    const CharReach &escape) {
234     flat_set<u16> rv;
235     CharReach nonexit = ~escape;
236     for (auto i = nonexit.find_first(); i != nonexit.npos;
237          i = nonexit.find_next(i)) {
238         rv.insert(rdfa.alpha_remap[i]);
239     }
240 
241     return rv;
242 }
243 
244 static
get_sds_or_proxy(const raw_dfa & raw)245 dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
246     if (raw.start_floating != DEAD_STATE) {
247         DEBUG_PRINTF("has floating start\n");
248         return raw.start_floating;
249     }
250 
251     DEBUG_PRINTF("looking for SDS proxy\n");
252 
253     dstate_id_t s = raw.start_anchored;
254 
255     if (has_self_loop(s, raw)) {
256         return s;
257     }
258 
259     u16 top_remap = raw.alpha_remap[TOP];
260 
261     std::unordered_set<dstate_id_t> seen;
262     while (true) {
263         seen.insert(s);
264         DEBUG_PRINTF("basis %hu\n", s);
265 
266         /* check if we are connected to a state with a self loop */
267         for (u32 i = 0; i < raw.states[s].next.size(); i++) {
268             dstate_id_t t = raw.states[s].next[i];
269             if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
270                 return t;
271             }
272         }
273 
274         /* find a neighbour to use as a basis for looking for the sds proxy */
275         dstate_id_t t = DEAD_STATE;
276         for (u32 i = 0; i < raw.states[s].next.size(); i++) {
277             dstate_id_t tt = raw.states[s].next[i];
278             if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
279                 t = tt;
280                 break;
281             }
282         }
283 
284         if (t == DEAD_STATE) {
285             /* we were unable to find a state to use as a SDS proxy */
286             return DEAD_STATE;
287         }
288 
289         s = t;
290     }
291 }
292 
293 static
find_region(const raw_dfa & rdfa,dstate_id_t base,const AccelScheme & ei)294 set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
295                              const AccelScheme &ei) {
296     DEBUG_PRINTF("looking for region around %hu\n", base);
297 
298     set<dstate_id_t> region = {base};
299 
300     if (!ei.double_byte.empty()) {
301         return region;
302     }
303 
304     DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset);
305 
306     const CharReach &escape = ei.cr;
307     auto nonexit_symbols = find_nonexit_symbols(rdfa, escape);
308 
309     vector<dstate_id_t> pending = {base};
310     while (!pending.empty()) {
311         dstate_id_t curr = pending.back();
312         pending.pop_back();
313         for (auto s : nonexit_symbols) {
314             dstate_id_t t = rdfa.states[curr].next[s];
315             if (contains(region, t)) {
316                 continue;
317             }
318 
319             DEBUG_PRINTF("    %hu is in region\n", t);
320             region.insert(t);
321             pending.push_back(t);
322         }
323     }
324 
325     return region;
326 }
327 
328 AccelScheme
find_escape_strings(dstate_id_t this_idx) const329 accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
330     AccelScheme rv;
331     const raw_dfa &rdfa = get_raw();
332     rv.cr.clear();
333     rv.offset = 0;
334     const dstate &raw = rdfa.states[this_idx];
335     const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
336     bool outs2_broken = false;
337     flat_map<dstate_id_t, CharReach> succs;
338 
339     for (u32 i = 0; i < rev_map.size(); i++) {
340         if (raw.next[i] == this_idx) {
341             continue;
342         }
343 
344         const CharReach &cr_i = rev_map.at(i);
345 
346         rv.cr |= cr_i;
347         dstate_id_t next_id = raw.next[i];
348 
349         DEBUG_PRINTF("next is %hu\n", next_id);
350         const dstate &raw_next = rdfa.states[next_id];
351 
352         if (outs2_broken) {
353             continue;
354         }
355 
356         if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
357             DEBUG_PRINTF("leads to report\n");
358             outs2_broken = true; /* cannot accelerate over reports */
359             continue;
360         }
361         succs[next_id] |= cr_i;
362     }
363 
364     if (!outs2_broken) {
365         for (const auto &e : succs) {
366             const CharReach &cr_i = e.second;
367             const dstate &raw_next = rdfa.states[e.first];
368 
369             CharReach cr_all_j;
370             for (u32 j = 0; j < rev_map.size(); j++) {
371                 if (raw_next.next[j] == raw.next[j]) {
372                     continue;
373                 }
374 
375                 DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n", e.first,
376                              j, raw_next.next[j]);
377                 cr_all_j |= rev_map.at(j);
378             }
379 
380             if (cr_i.count() * cr_all_j.count() > 8) {
381                 DEBUG_PRINTF("adding %zu to double_cr\n", cr_i.count());
382                 rv.double_cr |= cr_i;
383             } else {
384                 for (auto ii = cr_i.find_first(); ii != CharReach::npos;
385                      ii = cr_i.find_next(ii)) {
386                     for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
387                          jj = cr_all_j.find_next(jj)) {
388                         rv.double_byte.emplace((u8)ii, (u8)jj);
389                         if (rv.double_byte.size() > 8) {
390                             DEBUG_PRINTF("outs2 too big\n");
391                             outs2_broken = true;
392                             goto done;
393                         }
394                     }
395                 }
396             }
397         }
398 
399     done:
400         assert(outs2_broken || rv.double_byte.size() <= 8);
401         if (outs2_broken) {
402             rv.double_byte.clear();
403         }
404     }
405 
406     DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
407     DEBUG_PRINTF("broken %d\n", outs2_broken);
408     if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) &&
409         this_idx == rdfa.start_floating && this_idx != DEAD_STATE) {
410         DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
411         auto offset =
412             look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel());
413         DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count());
414         if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
415             DEBUG_PRINTF("using offset accel\n");
416             rv = offset;
417         }
418     }
419 
420     return rv;
421 }
422 
423 void
buildAccel(UNUSED dstate_id_t this_idx,const AccelScheme & info,void * accel_out)424 accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
425                                   const AccelScheme &info,
426                                   void *accel_out) {
427     AccelAux *accel = (AccelAux *)accel_out;
428 
429     DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
430                  info.double_offset);
431     accel->generic.offset = verify_u8(info.offset);
432 
433     if (double_byte_ok(info) && info.double_cr.none() &&
434         info.double_byte.size() == 1) {
435         accel->accel_type = ACCEL_DVERM;
436         accel->dverm.c1 = info.double_byte.begin()->first;
437         accel->dverm.c2 = info.double_byte.begin()->second;
438         accel->dverm.offset = verify_u8(info.double_offset);
439         DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
440         return;
441     }
442 
443     if (double_byte_ok(info) && info.double_cr.none() &&
444         (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
445         bool ok = true;
446 
447         assert(!info.double_byte.empty());
448         u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
449         u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
450 
451         for (const pair<u8, u8> &p : info.double_byte) {
452             if ((p.first & CASE_CLEAR) != firstC ||
453                 (p.second & CASE_CLEAR) != secondC) {
454                 ok = false;
455                 break;
456             }
457         }
458 
459         if (ok) {
460             accel->accel_type = ACCEL_DVERM_NOCASE;
461             accel->dverm.c1 = firstC;
462             accel->dverm.c2 = secondC;
463             accel->dverm.offset = verify_u8(info.double_offset);
464             DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
465             return;
466         }
467 
468         u8 m1;
469         u8 m2;
470         if (buildDvermMask(info.double_byte, &m1, &m2)) {
471             accel->accel_type = ACCEL_DVERM_MASKED;
472             accel->dverm.offset = verify_u8(info.double_offset);
473             accel->dverm.c1 = info.double_byte.begin()->first & m1;
474             accel->dverm.c2 = info.double_byte.begin()->second & m2;
475             accel->dverm.m1 = m1;
476             accel->dverm.m2 = m2;
477             DEBUG_PRINTF(
478                 "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
479                 accel->dverm.c1, accel->dverm.c2);
480             return;
481         }
482     }
483 
484     if (double_byte_ok(info) &&
485         shuftiBuildDoubleMasks(
486             info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1,
487             (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2,
488             (u8 *)&accel->dshufti.hi2)) {
489         accel->accel_type = ACCEL_DSHUFTI;
490         accel->dshufti.offset = verify_u8(info.double_offset);
491         DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
492         return;
493     }
494 
495     if (info.cr.none()) {
496         accel->accel_type = ACCEL_RED_TAPE;
497         DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
498                      " from which there is no escape\n",
499                      this_idx);
500         return;
501     }
502 
503     if (info.cr.count() == 1) {
504         accel->accel_type = ACCEL_VERM;
505         accel->verm.c = info.cr.find_first();
506         DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
507         return;
508     }
509 
510     if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
511         accel->accel_type = ACCEL_VERM_NOCASE;
512         accel->verm.c = info.cr.find_first() & CASE_CLEAR;
513         DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
514         return;
515     }
516 
517     if (info.cr.count() > max_floating_stop_char()) {
518         accel->accel_type = ACCEL_NONE;
519         DEBUG_PRINTF("state %hu is too broad\n", this_idx);
520         return;
521     }
522 
523     accel->accel_type = ACCEL_SHUFTI;
524     if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo,
525                                (u8 *)&accel->shufti.hi)) {
526         DEBUG_PRINTF("state %hu is shufti\n", this_idx);
527         return;
528     }
529 
530     assert(!info.cr.none());
531     accel->accel_type = ACCEL_TRUFFLE;
532     truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1,
533                       (u8 *)&accel->truffle.mask2);
534     DEBUG_PRINTF("state %hu is truffle\n", this_idx);
535 }
536 
537 map<dstate_id_t, AccelScheme>
getAccelInfo(const Grey & grey)538 accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
539     map<dstate_id_t, AccelScheme> rv;
540     raw_dfa &rdfa = get_raw();
541     if (!grey.accelerateDFA) {
542         return rv;
543     }
544 
545     dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
546     DEBUG_PRINTF("sds %hu\n", sds_proxy);
547 
548     /* Find accel info for a single state. */
549     auto do_state = [&](size_t i) {
550         if (i == DEAD_STATE) {
551             return;
552         }
553 
554         /* Note on report acceleration states: While we can't accelerate while
555          * we are spamming out callbacks, the QR code paths don't raise reports
556          * during scanning so they can accelerate report states. */
557         if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
558             return;
559         }
560 
561         size_t single_limit =
562             i == sds_proxy ? max_floating_stop_char() : max_stop_char();
563         DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
564 
565         AccelScheme ei = find_escape_strings(i);
566         if (ei.cr.count() > single_limit) {
567             DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
568                          ei.cr.count());
569             return;
570         }
571 
572         DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
573 
574         rv[i] = ei;
575     };
576 
577     if (only_accel_init) {
578         DEBUG_PRINTF("only computing accel for init states\n");
579         do_state(rdfa.start_anchored);
580         if (rdfa.start_floating != rdfa.start_anchored) {
581             do_state(rdfa.start_floating);
582         }
583     } else {
584         DEBUG_PRINTF("computing accel for all states\n");
585         for (size_t i = 0; i < rdfa.states.size(); i++) {
586             do_state(i);
587         }
588     }
589 
590     /* provide acceleration states to states in the region of sds */
591     if (contains(rv, sds_proxy)) {
592         AccelScheme sds_ei = rv[sds_proxy];
593         sds_ei.double_byte.clear(); /* region based on single byte scheme
594                                      * may differ from double byte */
595         DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
596                      sds_ei.cr.count());
597         auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
598         for (auto s : sds_region) {
599             if (!contains(rv, s) || better(sds_ei, rv[s])) {
600                 rv[s] = sds_ei;
601             }
602         }
603     }
604 
605     return rv;
606 }
607 };
608