1 /******************************************************************************\
2 * Copyright (c) 2016, Robert van Engelen, Genivia Inc. All rights reserved.    *
3 *                                                                              *
4 * Redistribution and use in source and binary forms, with or without           *
5 * modification, are permitted provided that the following conditions are met:  *
6 *                                                                              *
7 *   (1) Redistributions of source code must retain the above copyright notice, *
8 *       this list of conditions and the following disclaimer.                  *
9 *                                                                              *
10 *   (2) Redistributions in binary form must reproduce the above copyright      *
11 *       notice, this list of conditions and the following disclaimer in the    *
12 *       documentation and/or other materials provided with the distribution.   *
13 *                                                                              *
14 *   (3) The name of the author may not be used to endorse or promote products  *
15 *       derived from this software without specific prior written permission.  *
16 *                                                                              *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED *
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF         *
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO   *
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,       *
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;  *
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,     *
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR      *
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF       *
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                   *
27 \******************************************************************************/
28 
29 /**
30 @file      simd_avx512bw.cpp
31 @brief     RE/flex SIMD intrinsics compiled with -mavx512bw
32 @author    Robert van Engelen - engelen@genivia.com
33 @copyright (c) 2016-2021, Robert van Engelen, Genivia Inc. All rights reserved.
34 @copyright (c) BSD-3 License - see LICENSE.txt
35 */
36 
37 #include <reflex/matcher.h>
38 
39 namespace reflex {
40 
41 // Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part
simd_nlcount_avx512bw(const char * & b,const char * e)42 size_t simd_nlcount_avx512bw(const char*& b, const char *e)
43 {
44   const char *s = b;
45   size_t n = 0;
46 #if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64))
47   __m512i vlcn = _mm512_set1_epi8('\n');
48   while (s + 63 <= e)
49   {
50     __m512i vlcm = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(s));
51     uint64_t mask = _mm512_cmpeq_epi8_mask(vlcm, vlcn);
52     n += popcountl(mask);
53     s += 64;
54   }
55 #else
56   (void)e;
57 #endif
58   b = s;
59   return n;
60 }
61 
62 // string search scheme based on in http://0x80.pl/articles/simd-friendly-karp-rabin.html
simd_advance_avx512bw(const char * & b,const char * e,size_t & loc,size_t min,const char * pre,size_t len)63 bool Matcher::simd_advance_avx512bw(const char*& b, const char *e, size_t &loc, size_t min, const char *pre, size_t len)
64 {
65   const char *s = b;
66 #if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64))
67   __m512i vlcp = _mm512_set1_epi8(pre[lcp_]);
68   __m512i vlcs = _mm512_set1_epi8(pre[lcs_]);
69   while (s + 64 <= e)
70   {
71     __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(s));
72     __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(s + lcs_ - lcp_));
73     uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm);
74     while (mask != 0)
75     {
76       uint32_t offset = ctzl(mask);
77       if (std::memcmp(s - lcp_ + offset, pre, len) == 0)
78       {
79         loc = s - lcp_ + offset - buf_;
80         set_current(loc);
81         if (min == 0)
82           return true;
83         if (min >= 4)
84         {
85           if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
86             return true;
87         }
88         else
89         {
90           if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
91             return true;
92         }
93       }
94       mask &= mask - 1;
95     }
96     s += 64;
97   }
98 #else
99   (void)e, (void)loc, (void)min, (void)pre, (void)len;
100 #endif
101   b = s;
102   return false;
103 }
104 
105 } // namespace reflex
106