1 /******************************************************************************\
2 * Copyright (c) 2016, Robert van Engelen, Genivia Inc. All rights reserved. *
3 * *
4 * Redistribution and use in source and binary forms, with or without *
5 * modification, are permitted provided that the following conditions are met: *
6 * *
7 * (1) Redistributions of source code must retain the above copyright notice, *
8 * this list of conditions and the following disclaimer. *
9 * *
10 * (2) Redistributions in binary form must reproduce the above copyright *
11 * notice, this list of conditions and the following disclaimer in the *
12 * documentation and/or other materials provided with the distribution. *
13 * *
14 * (3) The name of the author may not be used to endorse or promote products *
15 * derived from this software without specific prior written permission. *
16 * *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED *
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF *
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO *
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; *
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, *
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR *
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF *
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
27 \******************************************************************************/
28
29 /**
30 @file simd_avx512bw.cpp
31 @brief RE/flex SIMD intrinsics compiled with -mavx512bw
32 @author Robert van Engelen - engelen@genivia.com
33 @copyright (c) 2016-2021, Robert van Engelen, Genivia Inc. All rights reserved.
34 @copyright (c) BSD-3 License - see LICENSE.txt
35 */
36
37 #include <reflex/matcher.h>
38
39 namespace reflex {
40
41 // Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part
simd_nlcount_avx512bw(const char * & b,const char * e)42 size_t simd_nlcount_avx512bw(const char*& b, const char *e)
43 {
44 const char *s = b;
45 size_t n = 0;
46 #if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64))
47 __m512i vlcn = _mm512_set1_epi8('\n');
48 while (s + 63 <= e)
49 {
50 __m512i vlcm = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(s));
51 uint64_t mask = _mm512_cmpeq_epi8_mask(vlcm, vlcn);
52 n += popcountl(mask);
53 s += 64;
54 }
55 #else
56 (void)e;
57 #endif
58 b = s;
59 return n;
60 }
61
62 // string search scheme based on in http://0x80.pl/articles/simd-friendly-karp-rabin.html
simd_advance_avx512bw(const char * & b,const char * e,size_t & loc,size_t min,const char * pre,size_t len)63 bool Matcher::simd_advance_avx512bw(const char*& b, const char *e, size_t &loc, size_t min, const char *pre, size_t len)
64 {
65 const char *s = b;
66 #if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64))
67 __m512i vlcp = _mm512_set1_epi8(pre[lcp_]);
68 __m512i vlcs = _mm512_set1_epi8(pre[lcs_]);
69 while (s + 64 <= e)
70 {
71 __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(s));
72 __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(s + lcs_ - lcp_));
73 uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm);
74 while (mask != 0)
75 {
76 uint32_t offset = ctzl(mask);
77 if (std::memcmp(s - lcp_ + offset, pre, len) == 0)
78 {
79 loc = s - lcp_ + offset - buf_;
80 set_current(loc);
81 if (min == 0)
82 return true;
83 if (min >= 4)
84 {
85 if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min))
86 return true;
87 }
88 else
89 {
90 if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0)
91 return true;
92 }
93 }
94 mask &= mask - 1;
95 }
96 s += 64;
97 }
98 #else
99 (void)e, (void)loc, (void)min, (void)pre, (void)len;
100 #endif
101 b = s;
102 return false;
103 }
104
105 } // namespace reflex
106