1 // Internal macros for the simd implementation -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <cstddef>
31 #include <cstdint>
32 
33 
34 #define _GLIBCXX_SIMD_BEGIN_NAMESPACE                                          \
35   namespace std _GLIBCXX_VISIBILITY(default)                                   \
36   {                                                                            \
37     _GLIBCXX_BEGIN_NAMESPACE_VERSION                                           \
38       namespace experimental {                                                 \
39       inline namespace parallelism_v2 {
40 #define _GLIBCXX_SIMD_END_NAMESPACE                                            \
41   }                                                                            \
42   }                                                                            \
43   _GLIBCXX_END_NAMESPACE_VERSION                                               \
44   }
45 
46 // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
47 // macros ARM{{{
48 #if defined __ARM_NEON
49 #define _GLIBCXX_SIMD_HAVE_NEON 1
50 #else
51 #define _GLIBCXX_SIMD_HAVE_NEON 0
52 #endif
53 #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
54 #define _GLIBCXX_SIMD_HAVE_NEON_A32 1
55 #else
56 #define _GLIBCXX_SIMD_HAVE_NEON_A32 0
57 #endif
58 #if defined __ARM_NEON && defined __aarch64__
59 #define _GLIBCXX_SIMD_HAVE_NEON_A64 1
60 #else
61 #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
62 #endif
63 //}}}
64 // x86{{{
65 #ifdef __MMX__
66 #define _GLIBCXX_SIMD_HAVE_MMX 1
67 #else
68 #define _GLIBCXX_SIMD_HAVE_MMX 0
69 #endif
70 #if defined __SSE__ || defined __x86_64__
71 #define _GLIBCXX_SIMD_HAVE_SSE 1
72 #else
73 #define _GLIBCXX_SIMD_HAVE_SSE 0
74 #endif
75 #if defined __SSE2__ || defined __x86_64__
76 #define _GLIBCXX_SIMD_HAVE_SSE2 1
77 #else
78 #define _GLIBCXX_SIMD_HAVE_SSE2 0
79 #endif
80 #ifdef __SSE3__
81 #define _GLIBCXX_SIMD_HAVE_SSE3 1
82 #else
83 #define _GLIBCXX_SIMD_HAVE_SSE3 0
84 #endif
85 #ifdef __SSSE3__
86 #define _GLIBCXX_SIMD_HAVE_SSSE3 1
87 #else
88 #define _GLIBCXX_SIMD_HAVE_SSSE3 0
89 #endif
90 #ifdef __SSE4_1__
91 #define _GLIBCXX_SIMD_HAVE_SSE4_1 1
92 #else
93 #define _GLIBCXX_SIMD_HAVE_SSE4_1 0
94 #endif
95 #ifdef __SSE4_2__
96 #define _GLIBCXX_SIMD_HAVE_SSE4_2 1
97 #else
98 #define _GLIBCXX_SIMD_HAVE_SSE4_2 0
99 #endif
100 #ifdef __XOP__
101 #define _GLIBCXX_SIMD_HAVE_XOP 1
102 #else
103 #define _GLIBCXX_SIMD_HAVE_XOP 0
104 #endif
105 #ifdef __AVX__
106 #define _GLIBCXX_SIMD_HAVE_AVX 1
107 #else
108 #define _GLIBCXX_SIMD_HAVE_AVX 0
109 #endif
110 #ifdef __AVX2__
111 #define _GLIBCXX_SIMD_HAVE_AVX2 1
112 #else
113 #define _GLIBCXX_SIMD_HAVE_AVX2 0
114 #endif
115 #ifdef __BMI__
116 #define _GLIBCXX_SIMD_HAVE_BMI1 1
117 #else
118 #define _GLIBCXX_SIMD_HAVE_BMI1 0
119 #endif
120 #ifdef __BMI2__
121 #define _GLIBCXX_SIMD_HAVE_BMI2 1
122 #else
123 #define _GLIBCXX_SIMD_HAVE_BMI2 0
124 #endif
125 #ifdef __LZCNT__
126 #define _GLIBCXX_SIMD_HAVE_LZCNT 1
127 #else
128 #define _GLIBCXX_SIMD_HAVE_LZCNT 0
129 #endif
130 #ifdef __SSE4A__
131 #define _GLIBCXX_SIMD_HAVE_SSE4A 1
132 #else
133 #define _GLIBCXX_SIMD_HAVE_SSE4A 0
134 #endif
135 #ifdef __FMA__
136 #define _GLIBCXX_SIMD_HAVE_FMA 1
137 #else
138 #define _GLIBCXX_SIMD_HAVE_FMA 0
139 #endif
140 #ifdef __FMA4__
141 #define _GLIBCXX_SIMD_HAVE_FMA4 1
142 #else
143 #define _GLIBCXX_SIMD_HAVE_FMA4 0
144 #endif
145 #ifdef __F16C__
146 #define _GLIBCXX_SIMD_HAVE_F16C 1
147 #else
148 #define _GLIBCXX_SIMD_HAVE_F16C 0
149 #endif
150 #ifdef __POPCNT__
151 #define _GLIBCXX_SIMD_HAVE_POPCNT 1
152 #else
153 #define _GLIBCXX_SIMD_HAVE_POPCNT 0
154 #endif
155 #ifdef __AVX512F__
156 #define _GLIBCXX_SIMD_HAVE_AVX512F 1
157 #else
158 #define _GLIBCXX_SIMD_HAVE_AVX512F 0
159 #endif
160 #ifdef __AVX512DQ__
161 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
162 #else
163 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
164 #endif
165 #ifdef __AVX512VL__
166 #define _GLIBCXX_SIMD_HAVE_AVX512VL 1
167 #else
168 #define _GLIBCXX_SIMD_HAVE_AVX512VL 0
169 #endif
170 #ifdef __AVX512BW__
171 #define _GLIBCXX_SIMD_HAVE_AVX512BW 1
172 #else
173 #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
174 #endif
175 
176 #if _GLIBCXX_SIMD_HAVE_SSE
177 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
178 #else
179 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
180 #endif
181 #if _GLIBCXX_SIMD_HAVE_SSE2
182 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
183 #else
184 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
185 #endif
186 
187 #if _GLIBCXX_SIMD_HAVE_AVX
188 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
189 #else
190 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
191 #endif
192 #if _GLIBCXX_SIMD_HAVE_AVX2
193 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
194 #else
195 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
196 #endif
197 
198 #if _GLIBCXX_SIMD_HAVE_AVX512F
199 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
200 #else
201 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
202 #endif
203 #if _GLIBCXX_SIMD_HAVE_AVX512BW
204 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
205 #else
206 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
207 #endif
208 
209 #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
210 #error "Use of SSE2 is required on AMD64"
211 #endif
212 //}}}
213 
214 #ifdef __clang__
215 #define _GLIBCXX_SIMD_NORMAL_MATH
216 #else
217 #define _GLIBCXX_SIMD_NORMAL_MATH                                              \
218   [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
219 #endif
220 #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
221 #define _GLIBCXX_SIMD_INTRINSIC                                                \
222   [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
223 #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
224 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
225 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
226 
227 #if defined __STRICT_ANSI__ && __STRICT_ANSI__
228 #define _GLIBCXX_SIMD_CONSTEXPR
229 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
230 #else
231 #define _GLIBCXX_SIMD_CONSTEXPR constexpr
232 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
233 #endif
234 
235 #if defined __clang__
236 #define _GLIBCXX_SIMD_USE_CONSTEXPR const
237 #else
238 #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
239 #endif
240 
241 #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
242 #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
243 #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro)                                \
244   __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
245 
246 #define _GLIBCXX_SIMD_ALL_BINARY(__macro)                                      \
247   _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
248 #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro)                                      \
249   _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
250 #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro)                                 \
251   _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
252 
253 #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
254 #undef _GLIBCXX_SIMD_ALWAYS_INLINE
255 #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
256 #undef _GLIBCXX_SIMD_INTRINSIC
257 #define _GLIBCXX_SIMD_INTRINSIC inline
258 #endif
259 
260 #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
261 #define _GLIBCXX_SIMD_X86INTRIN 1
262 #else
263 #define _GLIBCXX_SIMD_X86INTRIN 0
264 #endif
265 
266 // workaround macros {{{
267 // use aliasing loads to help GCC understand the data accesses better
268 // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
269 // fixed_size_simd<float, 16> x.
270 #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
271 
272 // vector conversions on x86 not optimized:
273 #if _GLIBCXX_SIMD_X86INTRIN
274 #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
275 #endif
276 
277 // integer division not optimized
278 #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
279 
280 // very bad codegen for extraction and concatenation of 128/256 "subregisters"
281 // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
282 #if _GLIBCXX_SIMD_X86INTRIN
283 #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
284 #endif
285 
286 // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
287 #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
288 
289 // bad codegen for zero-extend using simple concat(__x, 0)
290 #if _GLIBCXX_SIMD_X86INTRIN
291 #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
292 #endif
293 
294 // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
295 // of static_simd_cast)
296 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
297 
298 // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
299 // constraint on (static)_simd_cast)
300 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
301 // }}}
302 
303 #endif // __cplusplus >= 201703L
304 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
305 
306 // vim: foldmethod=marker
307