1 /******************************************************************************\
2 * Copyright (c) 2016, Robert van Engelen, Genivia Inc. All rights reserved. *
3 * *
4 * Redistribution and use in source and binary forms, with or without *
5 * modification, are permitted provided that the following conditions are met: *
6 * *
7 * (1) Redistributions of source code must retain the above copyright notice, *
8 * this list of conditions and the following disclaimer. *
9 * *
10 * (2) Redistributions in binary form must reproduce the above copyright *
11 * notice, this list of conditions and the following disclaimer in the *
12 * documentation and/or other materials provided with the distribution. *
13 * *
14 * (3) The name of the author may not be used to endorse or promote products *
15 * derived from this software without specific prior written permission. *
16 * *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED *
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF *
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO *
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; *
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, *
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR *
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF *
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
27 \******************************************************************************/
28
29 /**
30 @file simd.h
31 @brief RE/flex SIMD intrinsics
32 @author Robert van Engelen - engelen@genivia.com
33 @copyright (c) 2016-2020, Robert van Engelen, Genivia Inc. All rights reserved.
34 @copyright (c) BSD-3 License - see LICENSE.txt
35 */
36
37 #ifndef SIMD_H
38 #define SIMD_H
39
40 #if defined(HAVE_AVX512BW)
41 # include <immintrin.h>
42 #elif defined(HAVE_AVX2)
43 # include <immintrin.h>
44 #elif defined(HAVE_SSE2)
45 # include <emmintrin.h>
46 #elif defined(HAVE_NEON)
47 # include <arm_neon.h>
48 #endif
49
50 #if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2)
51
52 #ifdef _MSC_VER
53 # include <intrin.h>
54 #endif
55
56 #ifdef _MSC_VER
57 # define cpuidex __cpuidex
58 #else
59 # include <cpuid.h>
60 # define cpuidex(CPUInfo, id, subid) __cpuid_count(id, subid, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3])
61 #endif
62
63 namespace reflex {
64
65 // HW id
66 extern uint64_t HW;
67
68 // do we have AVX512BW?
have_HW_AVX512BW()69 inline bool have_HW_AVX512BW()
70 {
71 return HW & (1ULL << 62);
72 }
73
74 // do we have AVX2?
have_HW_AVX2()75 inline bool have_HW_AVX2()
76 {
77 return HW & (1ULL << 37);
78 }
79
80 // do we have SSE2?
have_HW_SSE2()81 inline bool have_HW_SSE2()
82 {
83 return HW & (1ULL << 26);
84 }
85
86 #ifdef _MSC_VER
87 #pragma intrinsic(_BitScanForward)
ctz(uint32_t x)88 inline uint32_t ctz(uint32_t x)
89 {
90 unsigned long r;
91 _BitScanForward(&r, x);
92 return r;
93 }
popcount(uint32_t x)94 inline uint32_t popcount(uint32_t x)
95 {
96 return __popcnt(x);
97 }
98 #ifdef _WIN64
99 #pragma intrinsic(_BitScanForward64)
ctzl(uint64_t x)100 inline uint32_t ctzl(uint64_t x)
101 {
102 unsigned long r;
103 _BitScanForward64(&r, x);
104 return r;
105 }
popcountl(uint64_t x)106 inline uint32_t popcountl(uint64_t x)
107 {
108 return static_cast<uint32_t>(__popcnt64(x));
109 }
110 #endif
111 #else
ctz(uint32_t x)112 inline uint32_t ctz(uint32_t x)
113 {
114 return __builtin_ctz(x);
115 }
ctzl(uint64_t x)116 inline uint32_t ctzl(uint64_t x)
117 {
118 return __builtin_ctzl(x);
119 }
popcount(uint32_t x)120 inline uint32_t popcount(uint32_t x)
121 {
122 return __builtin_popcount(x);
123 }
popcountl(uint64_t x)124 inline uint32_t popcountl(uint64_t x)
125 {
126 return __builtin_popcountl(x);
127 }
128 #endif
129
130 // Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part
131 extern size_t simd_nlcount_avx2(const char*& b, const char *e);
132 extern size_t simd_nlcount_avx512bw(const char*& b, const char *e);
133
134 } // namespace reflex
135
136 #endif
137
138 #endif
139
140
141