1 /* 2 * Copyright 2010-2019 Branimir Karadzic. All rights reserved. 3 * License: https://github.com/bkaradzic/bx#license-bsd-2-clause 4 */ 5 6 #ifndef BX_SIMD_T_H_HEADER_GUARD 7 #define BX_SIMD_T_H_HEADER_GUARD 8 9 #include "bx.h" 10 11 #define BX_SIMD_FORCE_INLINE BX_FORCE_INLINE 12 #define BX_SIMD_INLINE inline 13 14 #define BX_SIMD_AVX 0 15 #define BX_SIMD_LANGEXT 0 16 #define BX_SIMD_NEON 0 17 #define BX_SIMD_SSE 0 18 19 #define BX_CONFIG_SUPPORTS_SIMD 0 20 21 #if defined(__AVX__) || defined(__AVX2__) 22 # include <immintrin.h> 23 # undef BX_SIMD_AVX 24 # define BX_SIMD_AVX 1 25 #endif // 26 27 #if defined(__SSE2__) || (BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) ) 28 # include <emmintrin.h> // __m128i 29 # if defined(__SSE4_1__) 30 # include <smmintrin.h> 31 # endif // defined(__SSE4_1__) 32 # include <xmmintrin.h> // __m128 33 # undef BX_SIMD_SSE 34 # define BX_SIMD_SSE 1 35 #elif defined(__ARM_NEON__) && !BX_COMPILER_CLANG 36 # include <arm_neon.h> 37 # undef BX_SIMD_NEON 38 # define BX_SIMD_NEON 1 39 #elif BX_COMPILER_CLANG \ 40 && !BX_PLATFORM_EMSCRIPTEN \ 41 && !BX_PLATFORM_IOS \ 42 && BX_CLANG_HAS_EXTENSION(attribute_ext_vector_type) 43 # undef BX_SIMD_LANGEXT 44 # define BX_SIMD_LANGEXT 1 45 #endif // 46 47 namespace bx 48 { 49 #define ELEMx 0 50 #define ELEMy 1 51 #define ELEMz 2 52 #define ELEMw 3 53 #define BX_SIMD128_IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ 54 template<typename Ty> \ 55 Ty simd_swiz_##_x##_y##_z##_w(Ty _a); 56 #include "inline/simd128_swizzle.inl" 57 58 #undef BX_SIMD128_IMPLEMENT_SWIZZLE 59 #undef ELEMw 60 #undef ELEMz 61 #undef ELEMy 62 #undef ELEMx 63 64 #define BX_SIMD128_IMPLEMENT_TEST(_xyzw) \ 65 template<typename Ty> \ 66 BX_SIMD_FORCE_INLINE bool simd_test_any_##_xyzw(Ty _test); \ 67 \ 68 template<typename Ty> \ 69 BX_SIMD_FORCE_INLINE bool simd_test_all_##_xyzw(Ty _test) 70 71 BX_SIMD128_IMPLEMENT_TEST(x ); 72 BX_SIMD128_IMPLEMENT_TEST(y ); 73 BX_SIMD128_IMPLEMENT_TEST(xy ); 74 BX_SIMD128_IMPLEMENT_TEST(z ); 75 BX_SIMD128_IMPLEMENT_TEST(xz ); 76 BX_SIMD128_IMPLEMENT_TEST(yz ); 77 BX_SIMD128_IMPLEMENT_TEST(xyz ); 78 BX_SIMD128_IMPLEMENT_TEST(w ); 79 BX_SIMD128_IMPLEMENT_TEST(xw ); 80 BX_SIMD128_IMPLEMENT_TEST(yw ); 81 BX_SIMD128_IMPLEMENT_TEST(xyw ); 82 BX_SIMD128_IMPLEMENT_TEST(zw ); 83 BX_SIMD128_IMPLEMENT_TEST(xzw ); 84 BX_SIMD128_IMPLEMENT_TEST(yzw ); 85 BX_SIMD128_IMPLEMENT_TEST(xyzw); 86 #undef BX_SIMD128_IMPLEMENT_TEST 87 88 template<typename Ty> 89 Ty simd_shuf_xyAB(Ty _a, Ty _b); 90 91 template<typename Ty> 92 Ty simd_shuf_ABxy(Ty _a, Ty _b); 93 94 template<typename Ty> 95 Ty simd_shuf_CDzw(Ty _a, Ty _b); 96 97 template<typename Ty> 98 Ty simd_shuf_zwCD(Ty _a, Ty _b); 99 100 template<typename Ty> 101 Ty simd_shuf_xAyB(Ty _a, Ty _b); 102 103 template<typename Ty> 104 Ty simd_shuf_AxBy(Ty _a, Ty _b); 105 106 template<typename Ty> 107 Ty simd_shuf_zCwD(Ty _a, Ty _b); 108 109 template<typename Ty> 110 Ty simd_shuf_CzDw(Ty _a, Ty _b); 111 112 template<typename Ty> 113 float simd_x(Ty _a); 114 115 template<typename Ty> 116 float simd_y(Ty _a); 117 118 template<typename Ty> 119 float simd_z(Ty _a); 120 121 template<typename Ty> 122 float simd_w(Ty _a); 123 124 template<typename Ty> 125 Ty simd_ld(const void* _ptr); 126 127 template<typename Ty> 128 void simd_st(void* _ptr, Ty _a); 129 130 template<typename Ty> 131 void simd_stx(void* _ptr, Ty _a); 132 133 template<typename Ty> 134 void simd_stream(void* _ptr, Ty _a); 135 136 template<typename Ty> 137 Ty simd_ld(float _x, float _y, float _z, float _w); 138 139 template<typename Ty> 140 Ty simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d); 141 142 template<typename Ty> 143 Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w); 144 145 template<typename Ty> 146 Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d); 147 148 template<typename Ty> 149 Ty simd_splat(const void* _ptr); 150 151 template<typename Ty> 152 Ty simd_splat(float _a); 153 154 template<typename Ty> 155 Ty simd_isplat(uint32_t _a); 156 157 template<typename Ty> 158 Ty simd_zero(); 159 160 template<typename Ty> 161 Ty simd_itof(Ty _a); 162 163 template<typename Ty> 164 Ty simd_ftoi(Ty _a); 165 166 template<typename Ty> 167 Ty simd_round(Ty _a); 168 169 template<typename Ty> 170 Ty simd_add(Ty _a, Ty _b); 171 172 template<typename Ty> 173 Ty simd_sub(Ty _a, Ty _b); 174 175 template<typename Ty> 176 Ty simd_mul(Ty _a, Ty _b); 177 178 template<typename Ty> 179 Ty simd_div(Ty _a, Ty _b); 180 181 template<typename Ty> 182 Ty simd_rcp_est(Ty _a); 183 184 template<typename Ty> 185 Ty simd_sqrt(Ty _a); 186 187 template<typename Ty> 188 Ty simd_rsqrt_est(Ty _a); 189 190 template<typename Ty> 191 Ty simd_dot3(Ty _a, Ty _b); 192 193 template<typename Ty> 194 Ty simd_dot(Ty _a, Ty _b); 195 196 template<typename Ty> 197 Ty simd_cmpeq(Ty _a, Ty _b); 198 199 template<typename Ty> 200 Ty simd_cmplt(Ty _a, Ty _b); 201 202 template<typename Ty> 203 Ty simd_cmple(Ty _a, Ty _b); 204 205 template<typename Ty> 206 Ty simd_cmpgt(Ty _a, Ty _b); 207 208 template<typename Ty> 209 Ty simd_cmpge(Ty _a, Ty _b); 210 211 template<typename Ty> 212 Ty simd_min(Ty _a, Ty _b); 213 214 template<typename Ty> 215 Ty simd_max(Ty _a, Ty _b); 216 217 template<typename Ty> 218 Ty simd_and(Ty _a, Ty _b); 219 220 template<typename Ty> 221 Ty simd_andc(Ty _a, Ty _b); 222 223 template<typename Ty> 224 Ty simd_or(Ty _a, Ty _b); 225 226 template<typename Ty> 227 Ty simd_xor(Ty _a, Ty _b); 228 229 template<typename Ty> 230 Ty simd_sll(Ty _a, int _count); 231 232 template<typename Ty> 233 Ty simd_srl(Ty _a, int _count); 234 235 template<typename Ty> 236 Ty simd_sra(Ty _a, int _count); 237 238 template<typename Ty> 239 Ty simd_icmpeq(Ty _a, Ty _b); 240 241 template<typename Ty> 242 Ty simd_icmplt(Ty _a, Ty _b); 243 244 template<typename Ty> 245 Ty simd_icmpgt(Ty _a, Ty _b); 246 247 template<typename Ty> 248 Ty simd_imin(Ty _a, Ty _b); 249 250 template<typename Ty> 251 Ty simd_imax(Ty _a, Ty _b); 252 253 template<typename Ty> 254 Ty simd_iadd(Ty _a, Ty _b); 255 256 template<typename Ty> 257 Ty simd_isub(Ty _a, Ty _b); 258 259 template<typename Ty> 260 Ty simd_shuf_xAzC(Ty _a, Ty _b); 261 262 template<typename Ty> 263 Ty simd_shuf_yBwD(Ty _a, Ty _b); 264 265 template<typename Ty> 266 Ty simd_rcp(Ty _a); 267 268 template<typename Ty> 269 Ty simd_orx(Ty _a); 270 271 template<typename Ty> 272 Ty simd_orc(Ty _a, Ty _b); 273 274 template<typename Ty> 275 Ty simd_neg(Ty _a); 276 277 template<typename Ty> 278 Ty simd_madd(Ty _a, Ty _b, Ty _c); 279 280 template<typename Ty> 281 Ty simd_nmsub(Ty _a, Ty _b, Ty _c); 282 283 template<typename Ty> 284 Ty simd_div_nr(Ty _a, Ty _b); 285 286 template<typename Ty> 287 Ty simd_selb(Ty _mask, Ty _a, Ty _b); 288 289 template<typename Ty> 290 Ty simd_sels(Ty _test, Ty _a, Ty _b); 291 292 template<typename Ty> 293 Ty simd_not(Ty _a); 294 295 template<typename Ty> 296 Ty simd_abs(Ty _a); 297 298 template<typename Ty> 299 Ty simd_clamp(Ty _a, Ty _min, Ty _max); 300 301 template<typename Ty> 302 Ty simd_lerp(Ty _a, Ty _b, Ty _s); 303 304 template<typename Ty> 305 Ty simd_rsqrt(Ty _a); 306 307 template<typename Ty> 308 Ty simd_rsqrt_nr(Ty _a); 309 310 template<typename Ty> 311 Ty simd_rsqrt_carmack(Ty _a); 312 313 template<typename Ty> 314 Ty simd_sqrt_nr(Ty _a); 315 316 template<typename Ty> 317 Ty simd_log2(Ty _a); 318 319 template<typename Ty> 320 Ty simd_exp2(Ty _a); 321 322 template<typename Ty> 323 Ty simd_pow(Ty _a, Ty _b); 324 325 template<typename Ty> 326 Ty simd_cross3(Ty _a, Ty _b); 327 328 template<typename Ty> 329 Ty simd_normalize3(Ty _a); 330 331 template<typename Ty> 332 Ty simd_ceil(Ty _a); 333 334 template<typename Ty> 335 Ty simd_floor(Ty _a); 336 337 template<typename Ty> 338 Ty simd_shuf_xAzC_ni(Ty _a, Ty _b); 339 340 template<typename Ty> 341 Ty simd_shuf_yBwD_ni(Ty _a, Ty _b); 342 343 template<typename Ty> 344 Ty simd_madd_ni(Ty _a, Ty _b, Ty _c); 345 346 template<typename Ty> 347 Ty simd_nmsub_ni(Ty _a, Ty _b, Ty _c); 348 349 template<typename Ty> 350 Ty simd_div_nr_ni(Ty _a, Ty _b); 351 352 template<typename Ty> 353 Ty simd_rcp_ni(Ty _a); 354 355 template<typename Ty> 356 Ty simd_orx_ni(Ty _a); 357 358 template<typename Ty> 359 Ty simd_orc_ni(Ty _a, Ty _b); 360 361 template<typename Ty> 362 Ty simd_neg_ni(Ty _a); 363 364 template<typename Ty> 365 Ty simd_selb_ni(Ty _mask, Ty _a, Ty _b); 366 367 template<typename Ty> 368 Ty simd_sels_ni(Ty _test, Ty _a, Ty _b); 369 370 template<typename Ty> 371 Ty simd_not_ni(Ty _a); 372 373 template<typename Ty> 374 Ty simd_min_ni(Ty _a, Ty _b); 375 376 template<typename Ty> 377 Ty simd_max_ni(Ty _a, Ty _b); 378 379 template<typename Ty> 380 Ty simd_abs_ni(Ty _a); 381 382 template<typename Ty> 383 Ty simd_imin_ni(Ty _a, Ty _b); 384 385 template<typename Ty> 386 Ty simd_imax_ni(Ty _a, Ty _b); 387 388 template<typename Ty> 389 Ty simd_clamp_ni(Ty _a, Ty _min, Ty _max); 390 391 template<typename Ty> 392 Ty simd_lerp_ni(Ty _a, Ty _b, Ty _s); 393 394 template<typename Ty> 395 Ty simd_sqrt_nr_ni(Ty _a); 396 397 template<typename Ty> 398 Ty simd_sqrt_nr1_ni(Ty _a); 399 400 template<typename Ty> 401 Ty simd_rsqrt_ni(Ty _a); 402 403 template<typename Ty> 404 Ty simd_rsqrt_nr_ni(Ty _a); 405 406 template<typename Ty> 407 Ty simd_rsqrt_carmack_ni(Ty _a); 408 409 template<typename Ty> 410 Ty simd_log2_ni(Ty _a); 411 412 template<typename Ty> 413 Ty simd_exp2_ni(Ty _a); 414 415 template<typename Ty> 416 Ty simd_pow_ni(Ty _a, Ty _b); 417 418 template<typename Ty> 419 Ty simd_dot3_ni(Ty _a, Ty _b); 420 421 template<typename Ty> 422 Ty simd_cross3_ni(Ty _a, Ty _b); 423 424 template<typename Ty> 425 Ty simd_normalize3_ni(Ty _a); 426 427 template<typename Ty> 428 Ty simd_dot_ni(Ty _a, Ty _b); 429 430 template<typename Ty> 431 Ty simd_ceil_ni(Ty _a); 432 433 template<typename Ty> 434 Ty simd_floor_ni(Ty _a); 435 436 template<typename Ty> 437 Ty simd_round_ni(Ty _a); 438 439 template<typename Ty> 440 bool simd_test_any_ni(Ty _a); 441 442 template<typename Ty> 443 bool simd_test_all_ni(Ty _a); 444 445 #if BX_SIMD_AVX 446 typedef __m256 simd256_avx_t; 447 #endif // BX_SIMD_SSE 448 449 #if BX_SIMD_LANGEXT 450 union simd128_langext_t 451 { 452 float __attribute__((vector_size(16))) vf; 453 int32_t __attribute__((vector_size(16))) vi; 454 uint32_t __attribute__((vector_size(16))) vu; 455 float fxyzw[4]; 456 int32_t ixyzw[4]; 457 uint32_t uxyzw[4]; 458 459 }; 460 #endif // BX_SIMD_LANGEXT 461 462 #if BX_SIMD_NEON 463 typedef float32x4_t simd128_neon_t; 464 #endif // BX_SIMD_NEON 465 466 #if BX_SIMD_SSE 467 typedef __m128 simd128_sse_t; 468 #endif // BX_SIMD_SSE 469 470 } // namespace bx 471 472 #if BX_SIMD_AVX 473 # include "inline/simd256_avx.inl" 474 #endif // BX_SIMD_AVX 475 476 #if BX_SIMD_LANGEXT 477 # include "inline/simd128_langext.inl" 478 #endif // BX_SIMD_LANGEXT 479 480 #if BX_SIMD_NEON 481 # include "inline/simd128_neon.inl" 482 #endif // BX_SIMD_NEON 483 484 #if BX_SIMD_SSE 485 # include "inline/simd128_sse.inl" 486 #endif // BX_SIMD_SSE 487 488 #if ( BX_SIMD_LANGEXT \ 489 || BX_SIMD_NEON \ 490 || BX_SIMD_SSE \ 491 || BX_SIMD_AVX \ 492 ) 493 # undef BX_CONFIG_SUPPORTS_SIMD 494 # define BX_CONFIG_SUPPORTS_SIMD 1 495 #endif // BX_SIMD_* 496 497 namespace bx 498 { 499 union simd128_ref_t 500 { 501 float fxyzw[4]; 502 int32_t ixyzw[4]; 503 uint32_t uxyzw[4]; 504 }; 505 506 #ifndef BX_SIMD_WARN_REFERENCE_IMPL 507 # define BX_SIMD_WARN_REFERENCE_IMPL 0 508 #endif // BX_SIMD_WARN_REFERENCE_IMPL 509 510 #if !BX_CONFIG_SUPPORTS_SIMD 511 # if BX_SIMD_WARN_REFERENCE_IMPL 512 # pragma message("*** Using SIMD128 reference implementation! ***") 513 # endif // BX_SIMD_WARN_REFERENCE_IMPL 514 515 typedef simd128_ref_t simd128_t; 516 #endif // BX_SIMD_REFERENCE 517 518 struct simd256_ref_t 519 { 520 #if BX_COMPILER_MSVC 521 typedef simd128_ref_t type; 522 #else 523 typedef simd128_t type; 524 #endif // BX_COMPILER_MSVC 525 526 type simd128_0; 527 type simd128_1; 528 }; 529 530 #if !BX_SIMD_AVX 531 # if BX_SIMD_WARN_REFERENCE_IMPL 532 # pragma message("*** Using SIMD256 reference implementation! ***") 533 # endif // BX_SIMD_WARN_REFERENCE_IMPL 534 535 typedef simd256_ref_t simd256_t; 536 #endif // !BX_SIMD_AVX 537 538 simd128_t simd_zero(); 539 540 simd128_t simd_ld(const void* _ptr); 541 542 simd128_t simd_ld(float _x, float _y, float _z, float _w); 543 544 simd128_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w); 545 546 simd128_t simd_splat(const void* _ptr); 547 548 simd128_t simd_splat(float _a); 549 550 simd128_t simd_isplat(uint32_t _a); 551 552 } // namespace bx 553 554 #include "inline/simd128_ref.inl" 555 #include "inline/simd256_ref.inl" 556 557 #include "inline/simd_ni.inl" 558 559 #endif // BX_SIMD_T_H_HEADER_GUARD 560