1 /******************************************************************************\
2 * Copyright (c) 2016, Robert van Engelen, Genivia Inc. All rights reserved.    *
3 *                                                                              *
4 * Redistribution and use in source and binary forms, with or without           *
5 * modification, are permitted provided that the following conditions are met:  *
6 *                                                                              *
7 *   (1) Redistributions of source code must retain the above copyright notice, *
8 *       this list of conditions and the following disclaimer.                  *
9 *                                                                              *
10 *   (2) Redistributions in binary form must reproduce the above copyright      *
11 *       notice, this list of conditions and the following disclaimer in the    *
12 *       documentation and/or other materials provided with the distribution.   *
13 *                                                                              *
14 *   (3) The name of the author may not be used to endorse or promote products  *
15 *       derived from this software without specific prior written permission.  *
16 *                                                                              *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED *
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF         *
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO   *
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,       *
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;  *
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,     *
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR      *
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF       *
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                   *
27 \******************************************************************************/
28 
29 /**
30 @file      matcher.cpp
31 @brief     RE/flex matcher engine
32 @author    Robert van Engelen - engelen@genivia.com
33 @copyright (c) 2016-2020, Robert van Engelen, Genivia Inc. All rights reserved.
34 @copyright (c) BSD-3 License - see LICENSE.txt
35 */
36 
37 #include <reflex/matcher.h>
38 
39 namespace reflex {
40 
41 /// Boyer-Moore preprocessing of the given pattern prefix pat of length len (<=255), generates bmd_ > 0 and bms_[] shifts.
boyer_moore_init(const char * pat,size_t len)42 void Matcher::boyer_moore_init(const char *pat, size_t len)
43 {
44   // Relative frequency table of English letters, source code, and UTF-8 bytes
45   static unsigned char freq[256] = "\0\0\0\0\0\0\0\0\0\73\4\0\0\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\73\70\70\1\1\2\2\70\70\70\2\2\70\70\70\2\3\3\3\3\3\3\3\3\3\3\70\70\70\70\70\70\2\35\14\24\26\37\20\17\30\33\11\12\25\22\32\34\15\7\27\31\36\23\13\21\10\16\6\70\1\70\2\70\1\67\46\56\60\72\52\51\62\65\43\44\57\54\64\66\47\41\61\63\71\55\45\53\42\50\40\70\2\70\2\0\47\47\47\47\47\47\47\47\47\47\47\47\47\47\47\47\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\44\44\44\44\44\44\44\44\44\44\44\44\44\44\44\44\0\0\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\46\56\56\56\56\56\56\56\56\56\56\56\56\46\56\56\73\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
46   uint8_t n = static_cast<uint8_t>(len); // okay to cast: actually never more than 255
47   uint16_t i;
48   for (i = 0; i < 256; ++i)
49     bms_[i] = n;
50   lcp_ = 0;
51   lcs_ = n > 1;
52   for (i = 0; i < n; ++i)
53   {
54     uint8_t pch = static_cast<uint8_t>(pat[i]);
55     bms_[pch] = static_cast<uint8_t>(n - i - 1);
56     if (i > 0)
57     {
58       if (freq[static_cast<uint8_t>(pat[lcp_])] > freq[pch])
59       {
60         lcs_ = lcp_;
61         lcp_ = i;
62       }
63       else if (freq[static_cast<uint8_t>(pat[lcs_])] > freq[pch])
64       {
65         lcs_ = i;
66       }
67     }
68   }
69   uint16_t j;
70   for (i = n - 1, j = i; j > 0; --j)
71     if (pat[j - 1] == pat[i])
72       break;
73   bmd_ = i - j + 1;
74 #if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) || defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
75   size_t score = 0;
76   for (i = 0; i < n; ++i)
77     score += bms_[static_cast<uint8_t>(pat[i])];
78   score /= n;
79   uint8_t fch = freq[static_cast<uint8_t>(pat[lcp_])];
80 #if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2)
81   if (!have_HW_SSE2() && !have_HW_AVX2() && !have_HW_AVX512BW())
82   {
83     // if scoring is high and freq is high, then use our improved Boyer-Moore instead of memchr()
84 #if defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
85     // SSE2 is available, expect fast memchr() to use instead of BM
86     if (score > 1 && fch > 35 && (score > 3 || fch > 50) && fch + score > 52)
87       lcs_ = 0xffff;
88 #else
89     // no SSE2 available, expect slow memchr() and use BM unless score or frequency are too low
90     if (fch > 37 || (fch > 8 && score > 0))
91       lcs_ = 0xffff;
92 #endif
93   }
94 #elif defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
95   // SSE2 is available, expect fast memchr() to use instead of BM
96   if (score > 1 && fch > 35 && (score > 3 || fch > 50) && fch + score > 52)
97     lcs_ = 0xffff;
98 #endif
99 #endif
100 }
101 
102 // advance input cursor position after mismatch to align input for the next match
advance()103 bool Matcher::advance()
104 {
105   size_t loc = cur_ + 1;
106   size_t min = pat_->min_;
107   if (pat_->len_ == 0)
108   {
109     if (min == 0)
110       return false;
111     if (loc + min > end_)
112     {
113       set_current_match(loc - 1);
114       (void)peek_more();
115       loc = cur_ + 1;
116       if (loc + min > end_)
117         return false;
118     }
119     if (min >= 4)
120     {
121       const Pattern::Pred *bit = pat_->bit_;
122       Pattern::Pred state = ~0;
123       Pattern::Pred mask = (1 << (min - 1));
124       while (true)
125       {
126         const char *s = buf_ + loc;
127         const char *e = buf_ + end_;
128         while (s < e)
129         {
130           state = (state << 1) | bit[static_cast<uint8_t>(*s)];
131           if ((state & mask) == 0)
132             break;
133           ++s;
134         }
135         if (s < e)
136         {
137           s -= min - 1;
138           loc = s - buf_;
139           if (Pattern::predict_match(pat_->pmh_, s, min))
140           {
141             set_current(loc);
142             return true;
143           }
144           loc += min;
145         }
146         else
147         {
148           loc = s - buf_;
149           set_current_match(loc - min);
150           (void)peek_more();
151           loc = cur_ + min;
152           if (loc >= end_)
153             return false;
154         }
155       }
156     }
157     const Pattern::Pred *pma = pat_->pma_;
158     if (min == 3)
159     {
160       const Pattern::Pred *bit = pat_->bit_;
161       Pattern::Pred state = ~0;
162       while (true)
163       {
164         const char *s = buf_ + loc;
165         const char *e = buf_ + end_;
166         while (s < e)
167         {
168           state = (state << 1) | bit[static_cast<uint8_t>(*s)];
169           if ((state & 4) == 0)
170             break;
171           ++s;
172         }
173         if (s < e)
174         {
175           s -= 2;
176           loc = s - buf_;
177           if (s + 4 > e || Pattern::predict_match(pma, s) == 0)
178           {
179             set_current(loc);
180             return true;
181           }
182           loc += 3;
183         }
184         else
185         {
186           loc = s - buf_;
187           set_current_match(loc - 3);
188           (void)peek_more();
189           loc = cur_ + 3;
190           if (loc >= end_)
191             return false;
192         }
193       }
194     }
195     if (min == 2)
196     {
197       const Pattern::Pred *bit = pat_->bit_;
198       Pattern::Pred state = ~0;
199       while (true)
200       {
201         const char *s = buf_ + loc;
202         const char *e = buf_ + end_;
203         while (s < e)
204         {
205           state = (state << 1) | bit[static_cast<uint8_t>(*s)];
206           if ((state & 2) == 0)
207             break;
208           ++s;
209         }
210         if (s < e)
211         {
212           s -= 1;
213           loc = s - buf_;
214           if (s + 4 > e || Pattern::predict_match(pma, s) == 0)
215           {
216             set_current(loc);
217             return true;
218           }
219           loc += 2;
220         }
221         else
222         {
223           loc = s - buf_;
224           set_current_match(loc - 2);
225           (void)peek_more();
226           loc = cur_ + 2;
227           if (loc >= end_)
228             return false;
229         }
230       }
231     }
232     while (true)
233     {
234       const char *s = buf_ + loc;
235       const char *e = buf_ + end_;
236       while (s < e && (pma[static_cast<uint8_t>(*s)] & 0xc0) == 0xc0)
237         ++s;
238       if (s < e)
239       {
240         loc = s - buf_;
241         if (s + 4 > e)
242         {
243           set_current(loc);
244           return true;
245         }
246         size_t k = Pattern::predict_match(pma, s);
247         if (k == 0)
248         {
249           set_current(loc);
250           return true;
251         }
252         loc += k;
253       }
254       else
255       {
256         loc = s - buf_;
257         set_current_match(loc - 1);
258         (void)peek_more();
259         loc = cur_ + 1;
260         if (loc >= end_)
261           return false;
262       }
263     }
264   }
265   const char *pre = pat_->pre_;
266   size_t len = pat_->len_; // actually never more than 255
267   if (len == 1)
268   {
269     while (true)
270     {
271       const char *s = buf_ + loc;
272       const char *e = buf_ + end_;
273       s = static_cast<const char*>(std::memchr(s, *pre, e - s));
274       if (s != NULL)
275       {
276         loc = s - buf_;
277         set_current(loc);
278         return true;
279       }
280       loc = e - buf_;
281       set_current_match(loc - 1);
282       (void)peek_more();
283       loc = cur_ + 1;
284       if (loc + len > end_)
285         return false;
286     }
287   }
288   if (bmd_ == 0)
289     boyer_moore_init(pre, len);
290   while (true)
291   {
292     if (lcs_ < len)
293     {
294       const char *s = buf_ + loc + lcp_;
295       const char *e = buf_ + end_ + lcp_ - len + 1;
296 #if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64))
297       if (s + 64 > e && have_HW_AVX512BW())
298       {
299         if (simd_advance_avx512bw(s, e, loc, min, pre, len))
300           return true;
301       }
302       else if (s + 32 > e && have_HW_AVX2())
303       {
304         if (simd_advance_avx2(s, e, loc, min, pre, len))
305           return true;
306       }
307       else if (have_HW_SSE2())
308       {
309         // implements SSE2 string search scheme based on in http://0x80.pl/articles/simd-friendly-karp-rabin.html
310         __m128i vlcp = _mm_set1_epi8(pre[lcp_]);
311         __m128i vlcs = _mm_set1_epi8(pre[lcs_]);
312         while (s + 16 <= e)
313         {
314           __m128i vlcpm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s));
315           __m128i vlcsm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s + lcs_ - lcp_));
316           __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm);
317           __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm);
318           uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq));
319           while (mask != 0)
320           {
321             uint32_t offset = ctz(mask);
322             if (std::memcmp(s - lcp_ + offset, pre, len) == 0)
323             {
324               loc = s - lcp_ + offset - buf_;
325               set_current(loc);
326               if (min == 0)
327                 return true;
328               if (min >= 4)
329               {
330                 if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
331                   return true;
332               }
333               else
334               {
335                 if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
336                   return true;
337               }
338             }
339             mask &= mask - 1;
340           }
341           s += 16;
342         }
343       }
344 #elif defined(HAVE_AVX2)
345       if (s + 32 > e && have_HW_AVX2())
346       {
347         if (simd_advance_avx2(s, e, loc, min, pre, len))
348           return true;
349       }
350       else if (have_HW_SSE2())
351       {
352         // implements SSE2 string search scheme based on in http://0x80.pl/articles/simd-friendly-karp-rabin.html
353         __m128i vlcp = _mm_set1_epi8(pre[lcp_]);
354         __m128i vlcs = _mm_set1_epi8(pre[lcs_]);
355         while (s + 16 <= e)
356         {
357           __m128i vlcpm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s));
358           __m128i vlcsm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s + lcs_ - lcp_));
359           __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm);
360           __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm);
361           uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq));
362           while (mask != 0)
363           {
364             uint32_t offset = ctz(mask);
365             if (std::memcmp(s - lcp_ + offset, pre, len) == 0)
366             {
367               loc = s - lcp_ + offset - buf_;
368               set_current(loc);
369               if (min == 0)
370                 return true;
371               if (min >= 4)
372               {
373                 if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
374                   return true;
375               }
376               else
377               {
378                 if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
379                   return true;
380               }
381             }
382             mask &= mask - 1;
383           }
384           s += 16;
385         }
386       }
387 #elif defined(HAVE_SSE2)
388       if (have_HW_SSE2())
389       {
390         // implements SSE2 string search scheme based on in http://0x80.pl/articles/simd-friendly-karp-rabin.html
391         __m128i vlcp = _mm_set1_epi8(pre[lcp_]);
392         __m128i vlcs = _mm_set1_epi8(pre[lcs_]);
393         while (s + 16 <= e)
394         {
395           __m128i vlcpm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s));
396           __m128i vlcsm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s + lcs_ - lcp_));
397           __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm);
398           __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm);
399           uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq));
400           while (mask != 0)
401           {
402             uint32_t offset = ctz(mask);
403             if (std::memcmp(s - lcp_ + offset, pre, len) == 0)
404             {
405               loc = s - lcp_ + offset - buf_;
406               set_current(loc);
407               if (min == 0)
408                 return true;
409               if (min >= 4)
410               {
411                 if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
412                   return true;
413               }
414               else
415               {
416                 if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
417                   return true;
418               }
419             }
420             mask &= mask - 1;
421           }
422           s += 16;
423         }
424       }
425 #elif defined(HAVE_NEON)
426       // implements NEON/AArch64 string search scheme based on in http://0x80.pl/articles/simd-friendly-karp-rabin.html but 64 bit optimized
427       uint8x16_t vlcp = vdupq_n_u8(pre[lcp_]);
428       uint8x16_t vlcs = vdupq_n_u8(pre[lcs_]);
429       while (s + 16 <= e)
430       {
431         uint8x16_t vlcpm = vld1q_u8(reinterpret_cast<const uint8_t*>(s));
432         uint8x16_t vlcsm = vld1q_u8(reinterpret_cast<const uint8_t*>(s) + lcs_ - lcp_);
433         uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm);
434         uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm);
435         uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq);
436         uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8);
437         uint64_t mask = vgetq_lane_u64(vmask64, 0);
438         if (mask != 0)
439         {
440           for (int i = 0; i < 8; ++i)
441           {
442             if ((mask & 0xff) && std::memcmp(s - lcp_ + i, pre, len) == 0)
443             {
444               loc = s - lcp_ + i - buf_;
445               set_current(loc);
446               if (min == 0)
447                 return true;
448               if (min >= 4)
449               {
450                 if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
451                   return true;
452               }
453               else
454               {
455                 if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
456                   return true;
457               }
458             }
459             mask >>= 8;
460           }
461         }
462         mask = vgetq_lane_u64(vmask64, 1);
463         if (mask != 0)
464         {
465           for (int i = 0; i < 8; ++i)
466           {
467             if ((mask & 0xff) && std::memcmp(s - lcp_ + i + 8, pre, len) == 0)
468             {
469               loc = s - lcp_ + i + 8 - buf_;
470               set_current(loc);
471               if (min == 0)
472                 return true;
473               if (min >= 4)
474               {
475                 if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
476                   return true;
477               }
478               else
479               {
480                 if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
481                   return true;
482               }
483             }
484             mask >>= 8;
485           }
486         }
487         s += 16;
488       }
489 #endif
490       while (s < e)
491       {
492         do
493           s = static_cast<const char*>(std::memchr(s, pre[lcp_], e - s));
494         while (s != NULL && s[lcs_ - lcp_] != pre[lcs_] && ++s < e);
495         if (s == NULL || s >= e)
496         {
497           s = e;
498           break;
499         }
500         if (len <= 2 || memcmp(s - lcp_, pre, len) == 0)
501         {
502           loc = s - lcp_ - buf_;
503           set_current(loc);
504           if (min == 0)
505             return true;
506           if (min >= 4)
507           {
508             if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
509               return true;
510           }
511           else
512           {
513             if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
514               return true;
515           }
516         }
517         ++s;
518       }
519       loc = s - lcp_ - buf_;
520       set_current_match(loc - 1);
521       (void)peek_more();
522       loc = cur_ + 1;
523       if (loc + len > end_)
524         return false;
525     }
526     else
527     {
528       // implements our improved Boyer-Moore scheme
529       const char *s = buf_ + loc + len - 1;
530       const char *e = buf_ + end_;
531       const char *t = pre + len - 1;
532       while (s < e)
533       {
534         size_t k = 0;
535         do
536           s += k = bms_[static_cast<uint8_t>(*s)];
537         while (k > 0 ? s < e : s[lcp_ - len + 1] != pre[lcp_] && (s += bmd_) < e);
538         if (s >= e)
539           break;
540         const char *p = t - 1;
541         const char *q = s - 1;
542         while (p >= pre && *p == *q)
543         {
544           --p;
545           --q;
546         }
547         if (p < pre)
548         {
549           loc = q - buf_ + 1;
550           set_current(loc);
551           if (min == 0)
552             return true;
553           if (min >= 4)
554           {
555             if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
556               return true;
557           }
558           else
559           {
560             if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
561               return true;
562           }
563         }
564         if (pre + bmd_ >= p)
565         {
566           s += bmd_;
567         }
568         else
569         {
570           size_t k = bms_[static_cast<uint8_t>(*q)];
571           if (p + k > t + bmd_)
572             s += k - (t - p);
573           else
574             s += bmd_;
575         }
576       }
577       s -= len - 1;
578       loc = s - buf_;
579       set_current_match(loc - 1);
580       (void)peek_more();
581       loc = cur_ + 1;
582       if (loc + len > end_)
583         return false;
584     }
585   }
586 }
587 
588 } // namespace reflex
589