1 /******************************************************************************\
2 * Copyright (c) 2016, Robert van Engelen, Genivia Inc. All rights reserved.    *
3 *                                                                              *
4 * Redistribution and use in source and binary forms, with or without           *
5 * modification, are permitted provided that the following conditions are met:  *
6 *                                                                              *
7 *   (1) Redistributions of source code must retain the above copyright notice, *
8 *       this list of conditions and the following disclaimer.                  *
9 *                                                                              *
10 *   (2) Redistributions in binary form must reproduce the above copyright      *
11 *       notice, this list of conditions and the following disclaimer in the    *
12 *       documentation and/or other materials provided with the distribution.   *
13 *                                                                              *
14 *   (3) The name of the author may not be used to endorse or promote products  *
15 *       derived from this software without specific prior written permission.  *
16 *                                                                              *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED *
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF         *
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO   *
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,       *
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;  *
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,     *
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR      *
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF       *
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                   *
27 \******************************************************************************/
28 
29 /**
30 @file      simd.h
31 @brief     RE/flex SIMD intrinsics
32 @author    Robert van Engelen - engelen@genivia.com
33 @copyright (c) 2016-2020, Robert van Engelen, Genivia Inc. All rights reserved.
34 @copyright (c) BSD-3 License - see LICENSE.txt
35 */
36 
37 #ifndef SIMD_H
38 #define SIMD_H
39 
40 #if defined(HAVE_AVX512BW)
41 # include <immintrin.h>
42 #elif defined(HAVE_AVX2)
43 # include <immintrin.h>
44 #elif defined(HAVE_SSE2)
45 # include <emmintrin.h>
46 #elif defined(HAVE_NEON)
47 # include <arm_neon.h>
48 #endif
49 
50 #if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2)
51 
52 #ifdef _MSC_VER
53 # include <intrin.h>
54 #endif
55 
56 #ifdef _MSC_VER
57 # define cpuidex __cpuidex
58 #else
59 # include <cpuid.h>
60 # define cpuidex(CPUInfo, id, subid) __cpuid_count(id, subid, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3])
61 #endif
62 
63 namespace reflex {
64 
65 // HW id
66 extern uint64_t HW;
67 
68 // do we have AVX512BW?
have_HW_AVX512BW()69 inline bool have_HW_AVX512BW()
70 {
71   return HW & (1ULL << 62);
72 }
73 
74 // do we have AVX2?
have_HW_AVX2()75 inline bool have_HW_AVX2()
76 {
77   return HW & (1ULL << 37);
78 }
79 
80 // do we have SSE2?
have_HW_SSE2()81 inline bool have_HW_SSE2()
82 {
83   return HW & (1ULL << 26);
84 }
85 
86 #ifdef _MSC_VER
87 #pragma intrinsic(_BitScanForward)
ctz(uint32_t x)88 inline uint32_t ctz(uint32_t x)
89 {
90   unsigned long r;
91   _BitScanForward(&r, x);
92   return r;
93 }
popcount(uint32_t x)94 inline uint32_t popcount(uint32_t x)
95 {
96   return __popcnt(x);
97 }
98 #ifdef _WIN64
99 #pragma intrinsic(_BitScanForward64)
ctzl(uint64_t x)100 inline uint32_t ctzl(uint64_t x)
101 {
102   unsigned long r;
103   _BitScanForward64(&r, x);
104   return r;
105 }
popcountl(uint64_t x)106 inline uint32_t popcountl(uint64_t x)
107 {
108   return static_cast<uint32_t>(__popcnt64(x));
109 }
110 #endif
111 #else
ctz(uint32_t x)112 inline uint32_t ctz(uint32_t x)
113 {
114   return __builtin_ctz(x);
115 }
ctzl(uint64_t x)116 inline uint32_t ctzl(uint64_t x)
117 {
118   return __builtin_ctzl(x);
119 }
popcount(uint32_t x)120 inline uint32_t popcount(uint32_t x)
121 {
122   return __builtin_popcount(x);
123 }
popcountl(uint64_t x)124 inline uint32_t popcountl(uint64_t x)
125 {
126   return __builtin_popcountl(x);
127 }
128 #endif
129 
130 // Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part
131 extern size_t simd_nlcount_avx2(const char*& b, const char *e);
132 extern size_t simd_nlcount_avx512bw(const char*& b, const char *e);
133 
134 } // namespace reflex
135 
136 #endif
137 
138 #endif
139 
140 
141