1 // Internal macros for the simd implementation -*- C++ -*- 2 3 // Copyright (C) 2020-2021 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ 26 #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ 27 28 #if __cplusplus >= 201703L 29 30 #include <cstddef> 31 #include <cstdint> 32 33 34 #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \ 35 namespace std _GLIBCXX_VISIBILITY(default) \ 36 { \ 37 _GLIBCXX_BEGIN_NAMESPACE_VERSION \ 38 namespace experimental { \ 39 inline namespace parallelism_v2 { 40 #define _GLIBCXX_SIMD_END_NAMESPACE \ 41 } \ 42 } \ 43 _GLIBCXX_END_NAMESPACE_VERSION \ 44 } 45 46 // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX 47 // macros ARM{{{ 48 #if defined __ARM_NEON 49 #define _GLIBCXX_SIMD_HAVE_NEON 1 50 #else 51 #define _GLIBCXX_SIMD_HAVE_NEON 0 52 #endif 53 #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__) 54 #define _GLIBCXX_SIMD_HAVE_NEON_A32 1 55 #else 56 #define _GLIBCXX_SIMD_HAVE_NEON_A32 0 57 #endif 58 #if defined __ARM_NEON && defined __aarch64__ 59 #define _GLIBCXX_SIMD_HAVE_NEON_A64 1 60 #else 61 #define _GLIBCXX_SIMD_HAVE_NEON_A64 0 62 #endif 63 //}}} 64 // x86{{{ 65 #ifdef __MMX__ 66 #define _GLIBCXX_SIMD_HAVE_MMX 1 67 #else 68 #define _GLIBCXX_SIMD_HAVE_MMX 0 69 #endif 70 #if defined __SSE__ || defined __x86_64__ 71 #define _GLIBCXX_SIMD_HAVE_SSE 1 72 #else 73 #define _GLIBCXX_SIMD_HAVE_SSE 0 74 #endif 75 #if defined __SSE2__ || defined __x86_64__ 76 #define _GLIBCXX_SIMD_HAVE_SSE2 1 77 #else 78 #define _GLIBCXX_SIMD_HAVE_SSE2 0 79 #endif 80 #ifdef __SSE3__ 81 #define _GLIBCXX_SIMD_HAVE_SSE3 1 82 #else 83 #define _GLIBCXX_SIMD_HAVE_SSE3 0 84 #endif 85 #ifdef __SSSE3__ 86 #define _GLIBCXX_SIMD_HAVE_SSSE3 1 87 #else 88 #define _GLIBCXX_SIMD_HAVE_SSSE3 0 89 #endif 90 #ifdef __SSE4_1__ 91 #define _GLIBCXX_SIMD_HAVE_SSE4_1 1 92 #else 93 #define _GLIBCXX_SIMD_HAVE_SSE4_1 0 94 #endif 95 #ifdef __SSE4_2__ 96 #define _GLIBCXX_SIMD_HAVE_SSE4_2 1 97 #else 98 #define _GLIBCXX_SIMD_HAVE_SSE4_2 0 99 #endif 100 #ifdef __XOP__ 101 #define _GLIBCXX_SIMD_HAVE_XOP 1 102 #else 103 #define _GLIBCXX_SIMD_HAVE_XOP 0 104 #endif 105 #ifdef __AVX__ 106 #define _GLIBCXX_SIMD_HAVE_AVX 1 107 #else 108 #define _GLIBCXX_SIMD_HAVE_AVX 0 109 #endif 110 #ifdef __AVX2__ 111 #define _GLIBCXX_SIMD_HAVE_AVX2 1 112 #else 113 #define _GLIBCXX_SIMD_HAVE_AVX2 0 114 #endif 115 #ifdef __BMI__ 116 #define _GLIBCXX_SIMD_HAVE_BMI1 1 117 #else 118 #define _GLIBCXX_SIMD_HAVE_BMI1 0 119 #endif 120 #ifdef __BMI2__ 121 #define _GLIBCXX_SIMD_HAVE_BMI2 1 122 #else 123 #define _GLIBCXX_SIMD_HAVE_BMI2 0 124 #endif 125 #ifdef __LZCNT__ 126 #define _GLIBCXX_SIMD_HAVE_LZCNT 1 127 #else 128 #define _GLIBCXX_SIMD_HAVE_LZCNT 0 129 #endif 130 #ifdef __SSE4A__ 131 #define _GLIBCXX_SIMD_HAVE_SSE4A 1 132 #else 133 #define _GLIBCXX_SIMD_HAVE_SSE4A 0 134 #endif 135 #ifdef __FMA__ 136 #define _GLIBCXX_SIMD_HAVE_FMA 1 137 #else 138 #define _GLIBCXX_SIMD_HAVE_FMA 0 139 #endif 140 #ifdef __FMA4__ 141 #define _GLIBCXX_SIMD_HAVE_FMA4 1 142 #else 143 #define _GLIBCXX_SIMD_HAVE_FMA4 0 144 #endif 145 #ifdef __F16C__ 146 #define _GLIBCXX_SIMD_HAVE_F16C 1 147 #else 148 #define _GLIBCXX_SIMD_HAVE_F16C 0 149 #endif 150 #ifdef __POPCNT__ 151 #define _GLIBCXX_SIMD_HAVE_POPCNT 1 152 #else 153 #define _GLIBCXX_SIMD_HAVE_POPCNT 0 154 #endif 155 #ifdef __AVX512F__ 156 #define _GLIBCXX_SIMD_HAVE_AVX512F 1 157 #else 158 #define _GLIBCXX_SIMD_HAVE_AVX512F 0 159 #endif 160 #ifdef __AVX512DQ__ 161 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1 162 #else 163 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0 164 #endif 165 #ifdef __AVX512VL__ 166 #define _GLIBCXX_SIMD_HAVE_AVX512VL 1 167 #else 168 #define _GLIBCXX_SIMD_HAVE_AVX512VL 0 169 #endif 170 #ifdef __AVX512BW__ 171 #define _GLIBCXX_SIMD_HAVE_AVX512BW 1 172 #else 173 #define _GLIBCXX_SIMD_HAVE_AVX512BW 0 174 #endif 175 176 #if _GLIBCXX_SIMD_HAVE_SSE 177 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1 178 #else 179 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0 180 #endif 181 #if _GLIBCXX_SIMD_HAVE_SSE2 182 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1 183 #else 184 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0 185 #endif 186 187 #if _GLIBCXX_SIMD_HAVE_AVX 188 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1 189 #else 190 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0 191 #endif 192 #if _GLIBCXX_SIMD_HAVE_AVX2 193 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1 194 #else 195 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0 196 #endif 197 198 #if _GLIBCXX_SIMD_HAVE_AVX512F 199 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1 200 #else 201 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0 202 #endif 203 #if _GLIBCXX_SIMD_HAVE_AVX512BW 204 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1 205 #else 206 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0 207 #endif 208 209 #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2 210 #error "Use of SSE2 is required on AMD64" 211 #endif 212 //}}} 213 214 #ifdef __clang__ 215 #define _GLIBCXX_SIMD_NORMAL_MATH 216 #else 217 #define _GLIBCXX_SIMD_NORMAL_MATH \ 218 [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]] 219 #endif 220 #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]] 221 #define _GLIBCXX_SIMD_INTRINSIC \ 222 [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline 223 #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline 224 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0) 225 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1) 226 227 #if defined __STRICT_ANSI__ && __STRICT_ANSI__ 228 #define _GLIBCXX_SIMD_CONSTEXPR 229 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const 230 #else 231 #define _GLIBCXX_SIMD_CONSTEXPR constexpr 232 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr 233 #endif 234 235 #if defined __clang__ 236 #define _GLIBCXX_SIMD_USE_CONSTEXPR const 237 #else 238 #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr 239 #endif 240 241 #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^) 242 #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>) 243 #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \ 244 __macro(+) __macro(-) __macro(*) __macro(/) __macro(%) 245 246 #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \ 247 _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true) 248 #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \ 249 _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true) 250 #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \ 251 _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true) 252 253 #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE 254 #undef _GLIBCXX_SIMD_ALWAYS_INLINE 255 #define _GLIBCXX_SIMD_ALWAYS_INLINE inline 256 #undef _GLIBCXX_SIMD_INTRINSIC 257 #define _GLIBCXX_SIMD_INTRINSIC inline 258 #endif 259 260 #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX 261 #define _GLIBCXX_SIMD_X86INTRIN 1 262 #else 263 #define _GLIBCXX_SIMD_X86INTRIN 0 264 #endif 265 266 // workaround macros {{{ 267 // use aliasing loads to help GCC understand the data accesses better 268 // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with 269 // fixed_size_simd<float, 16> x. 270 #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1 271 272 // vector conversions on x86 not optimized: 273 #if _GLIBCXX_SIMD_X86INTRIN 274 #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1 275 #endif 276 277 // integer division not optimized 278 #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1 279 280 // very bad codegen for extraction and concatenation of 128/256 "subregisters" 281 // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM 282 #if _GLIBCXX_SIMD_X86INTRIN 283 #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1 284 #endif 285 286 // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16> 287 #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1 288 289 // bad codegen for zero-extend using simple concat(__x, 0) 290 #if _GLIBCXX_SIMD_X86INTRIN 291 #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1 292 #endif 293 294 // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type 295 // of static_simd_cast) 296 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1 297 298 // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE 299 // constraint on (static)_simd_cast) 300 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1 301 // }}} 302 303 #endif // __cplusplus >= 201703L 304 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ 305 306 // vim: foldmethod=marker 307