1/* 2 * Copyright 2010-2019 Branimir Karadzic. All rights reserved. 3 * License: https://github.com/bkaradzic/bx#license-bsd-2-clause 4 */ 5 6#ifndef BX_SIMD_T_H_HEADER_GUARD 7# error "Must be included from bx/simd_t.h!" 8#endif // BX_SIMD_T_H_HEADER_GUARD 9 10namespace bx 11{ 12 BX_CONST_FUNC float sqrt(float); 13 BX_CONST_FUNC float rsqrt(float); 14 15#define ELEMx 0 16#define ELEMy 1 17#define ELEMz 2 18#define ELEMw 3 19#define BX_SIMD128_IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \ 20 template<> \ 21 BX_SIMD_FORCE_INLINE simd128_ref_t simd_swiz_##_x##_y##_z##_w(simd128_ref_t _a) \ 22 { \ 23 simd128_ref_t result; \ 24 result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \ 25 result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \ 26 result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \ 27 result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \ 28 return result; \ 29 } 30 31#include "simd128_swizzle.inl" 32 33#undef BX_SIMD128_IMPLEMENT_SWIZZLE 34#undef ELEMw 35#undef ELEMz 36#undef ELEMy 37#undef ELEMx 38 39#define BX_SIMD128_IMPLEMENT_TEST(_xyzw, _mask) \ 40 template<> \ 41 BX_SIMD_FORCE_INLINE bool simd_test_any_##_xyzw(simd128_ref_t _test) \ 42 { \ 43 uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \ 44 | ( (_test.uxyzw[2]>>31)<<2) \ 45 | ( (_test.uxyzw[1]>>31)<<1) \ 46 | ( _test.uxyzw[0]>>31) \ 47 ; \ 48 return 0 != (tmp&(_mask) ); \ 49 } \ 50 \ 51 template<> \ 52 BX_SIMD_FORCE_INLINE bool simd_test_all_##_xyzw(simd128_ref_t _test) \ 53 { \ 54 uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \ 55 | ( (_test.uxyzw[2]>>31)<<2) \ 56 | ( (_test.uxyzw[1]>>31)<<1) \ 57 | ( _test.uxyzw[0]>>31) \ 58 ; \ 59 return (_mask) == (tmp&(_mask) ); \ 60 } 61 62BX_SIMD128_IMPLEMENT_TEST(x , 0x1); 63BX_SIMD128_IMPLEMENT_TEST(y , 0x2); 64BX_SIMD128_IMPLEMENT_TEST(xy , 0x3); 65BX_SIMD128_IMPLEMENT_TEST(z , 0x4); 66BX_SIMD128_IMPLEMENT_TEST(xz , 0x5); 67BX_SIMD128_IMPLEMENT_TEST(yz , 0x6); 68BX_SIMD128_IMPLEMENT_TEST(xyz , 0x7); 69BX_SIMD128_IMPLEMENT_TEST(w , 0x8); 70BX_SIMD128_IMPLEMENT_TEST(xw , 0x9); 71BX_SIMD128_IMPLEMENT_TEST(yw , 0xa); 72BX_SIMD128_IMPLEMENT_TEST(xyw , 0xb); 73BX_SIMD128_IMPLEMENT_TEST(zw , 0xc); 74BX_SIMD128_IMPLEMENT_TEST(xzw , 0xd); 75BX_SIMD128_IMPLEMENT_TEST(yzw , 0xe); 76BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf); 77 78#undef BX_SIMD128_IMPLEMENT_TEST 79 80 template<> 81 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_xyAB(simd128_ref_t _a, simd128_ref_t _b) 82 { 83 simd128_ref_t result; 84 result.uxyzw[0] = _a.uxyzw[0]; 85 result.uxyzw[1] = _a.uxyzw[1]; 86 result.uxyzw[2] = _b.uxyzw[0]; 87 result.uxyzw[3] = _b.uxyzw[1]; 88 return result; 89 } 90 91 template<> 92 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_ABxy(simd128_ref_t _a, simd128_ref_t _b) 93 { 94 simd128_ref_t result; 95 result.uxyzw[0] = _b.uxyzw[0]; 96 result.uxyzw[1] = _b.uxyzw[1]; 97 result.uxyzw[2] = _a.uxyzw[0]; 98 result.uxyzw[3] = _a.uxyzw[1]; 99 return result; 100 } 101 102 template<> 103 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_CDzw(simd128_ref_t _a, simd128_ref_t _b) 104 { 105 simd128_ref_t result; 106 result.uxyzw[0] = _b.uxyzw[2]; 107 result.uxyzw[1] = _b.uxyzw[3]; 108 result.uxyzw[2] = _a.uxyzw[2]; 109 result.uxyzw[3] = _a.uxyzw[3]; 110 return result; 111 } 112 113 template<> 114 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_zwCD(simd128_ref_t _a, simd128_ref_t _b) 115 { 116 simd128_ref_t result; 117 result.uxyzw[0] = _a.uxyzw[2]; 118 result.uxyzw[1] = _a.uxyzw[3]; 119 result.uxyzw[2] = _b.uxyzw[2]; 120 result.uxyzw[3] = _b.uxyzw[3]; 121 return result; 122 } 123 124 template<> 125 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_xAyB(simd128_ref_t _a, simd128_ref_t _b) 126 { 127 simd128_ref_t result; 128 result.uxyzw[0] = _a.uxyzw[0]; 129 result.uxyzw[1] = _b.uxyzw[0]; 130 result.uxyzw[2] = _a.uxyzw[1]; 131 result.uxyzw[3] = _b.uxyzw[1]; 132 return result; 133 } 134 135 template<> 136 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_AxBy(simd128_ref_t _a, simd128_ref_t _b) 137 { 138 simd128_ref_t result; 139 result.uxyzw[0] = _a.uxyzw[1]; 140 result.uxyzw[1] = _b.uxyzw[1]; 141 result.uxyzw[2] = _a.uxyzw[0]; 142 result.uxyzw[3] = _b.uxyzw[0]; 143 return result; 144 } 145 146 template<> 147 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_zCwD(simd128_ref_t _a, simd128_ref_t _b) 148 { 149 simd128_ref_t result; 150 result.uxyzw[0] = _a.uxyzw[2]; 151 result.uxyzw[1] = _b.uxyzw[2]; 152 result.uxyzw[2] = _a.uxyzw[3]; 153 result.uxyzw[3] = _b.uxyzw[3]; 154 return result; 155 } 156 157 template<> 158 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_CzDw(simd128_ref_t _a, simd128_ref_t _b) 159 { 160 simd128_ref_t result; 161 result.uxyzw[0] = _b.uxyzw[2]; 162 result.uxyzw[1] = _a.uxyzw[2]; 163 result.uxyzw[2] = _b.uxyzw[3]; 164 result.uxyzw[3] = _a.uxyzw[3]; 165 return result; 166 } 167 168 template<> 169 BX_SIMD_FORCE_INLINE float simd_x(simd128_ref_t _a) 170 { 171 return _a.fxyzw[0]; 172 } 173 174 template<> 175 BX_SIMD_FORCE_INLINE float simd_y(simd128_ref_t _a) 176 { 177 return _a.fxyzw[1]; 178 } 179 180 template<> 181 BX_SIMD_FORCE_INLINE float simd_z(simd128_ref_t _a) 182 { 183 return _a.fxyzw[2]; 184 } 185 186 template<> 187 BX_SIMD_FORCE_INLINE float simd_w(simd128_ref_t _a) 188 { 189 return _a.fxyzw[3]; 190 } 191 192 template<> 193 BX_SIMD_FORCE_INLINE simd128_ref_t simd_ld(const void* _ptr) 194 { 195 const uint32_t* input = reinterpret_cast<const uint32_t*>(_ptr); 196 simd128_ref_t result; 197 result.uxyzw[0] = input[0]; 198 result.uxyzw[1] = input[1]; 199 result.uxyzw[2] = input[2]; 200 result.uxyzw[3] = input[3]; 201 return result; 202 } 203 204 template<> 205 BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd128_ref_t _a) 206 { 207 uint32_t* result = reinterpret_cast<uint32_t*>(_ptr); 208 result[0] = _a.uxyzw[0]; 209 result[1] = _a.uxyzw[1]; 210 result[2] = _a.uxyzw[2]; 211 result[3] = _a.uxyzw[3]; 212 } 213 214 template<> 215 BX_SIMD_FORCE_INLINE void simd_stx(void* _ptr, simd128_ref_t _a) 216 { 217 uint32_t* result = reinterpret_cast<uint32_t*>(_ptr); 218 result[0] = _a.uxyzw[0]; 219 } 220 221 template<> 222 BX_SIMD_FORCE_INLINE void simd_stream(void* _ptr, simd128_ref_t _a) 223 { 224 uint32_t* result = reinterpret_cast<uint32_t*>(_ptr); 225 result[0] = _a.uxyzw[0]; 226 result[1] = _a.uxyzw[1]; 227 result[2] = _a.uxyzw[2]; 228 result[3] = _a.uxyzw[3]; 229 } 230 231 template<> 232 BX_SIMD_FORCE_INLINE simd128_ref_t simd_ld(float _x, float _y, float _z, float _w) 233 { 234 simd128_ref_t result; 235 result.fxyzw[0] = _x; 236 result.fxyzw[1] = _y; 237 result.fxyzw[2] = _z; 238 result.fxyzw[3] = _w; 239 return result; 240 } 241 242 template<> 243 BX_SIMD_FORCE_INLINE simd128_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) 244 { 245 simd128_ref_t result; 246 result.uxyzw[0] = _x; 247 result.uxyzw[1] = _y; 248 result.uxyzw[2] = _z; 249 result.uxyzw[3] = _w; 250 return result; 251 } 252 253 template<> 254 BX_SIMD_FORCE_INLINE simd128_ref_t simd_splat(const void* _ptr) 255 { 256 const uint32_t val = *reinterpret_cast<const uint32_t*>(_ptr); 257 simd128_ref_t result; 258 result.uxyzw[0] = val; 259 result.uxyzw[1] = val; 260 result.uxyzw[2] = val; 261 result.uxyzw[3] = val; 262 return result; 263 } 264 265 template<> 266 BX_SIMD_FORCE_INLINE simd128_ref_t simd_splat(float _a) 267 { 268 return simd_ld<simd128_ref_t>(_a, _a, _a, _a); 269 } 270 271 template<> 272 BX_SIMD_FORCE_INLINE simd128_ref_t simd_isplat(uint32_t _a) 273 { 274 return simd_ild<simd128_ref_t>(_a, _a, _a, _a); 275 } 276 277 template<> 278 BX_SIMD_FORCE_INLINE simd128_ref_t simd_zero() 279 { 280 return simd_ild<simd128_ref_t>(0, 0, 0, 0); 281 } 282 283 template<> 284 BX_SIMD_FORCE_INLINE simd128_ref_t simd_itof(simd128_ref_t _a) 285 { 286 simd128_ref_t result; 287 result.fxyzw[0] = (float)_a.ixyzw[0]; 288 result.fxyzw[1] = (float)_a.ixyzw[1]; 289 result.fxyzw[2] = (float)_a.ixyzw[2]; 290 result.fxyzw[3] = (float)_a.ixyzw[3]; 291 return result; 292 } 293 294 template<> 295 BX_SIMD_FORCE_INLINE simd128_ref_t simd_ftoi(simd128_ref_t _a) 296 { 297 simd128_ref_t result; 298 result.ixyzw[0] = (int)_a.fxyzw[0]; 299 result.ixyzw[1] = (int)_a.fxyzw[1]; 300 result.ixyzw[2] = (int)_a.fxyzw[2]; 301 result.ixyzw[3] = (int)_a.fxyzw[3]; 302 return result; 303 } 304 305 template<> 306 BX_SIMD_FORCE_INLINE simd128_ref_t simd_round(simd128_ref_t _a) 307 { 308 return simd_round_ni(_a); 309 } 310 311 template<> 312 BX_SIMD_FORCE_INLINE simd128_ref_t simd_add(simd128_ref_t _a, simd128_ref_t _b) 313 { 314 simd128_ref_t result; 315 result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0]; 316 result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1]; 317 result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2]; 318 result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3]; 319 return result; 320 } 321 322 template<> 323 BX_SIMD_FORCE_INLINE simd128_ref_t simd_sub(simd128_ref_t _a, simd128_ref_t _b) 324 { 325 simd128_ref_t result; 326 result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0]; 327 result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1]; 328 result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2]; 329 result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3]; 330 return result; 331 } 332 333 template<> 334 BX_SIMD_FORCE_INLINE simd128_ref_t simd_mul(simd128_ref_t _a, simd128_ref_t _b) 335 { 336 simd128_ref_t result; 337 result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0]; 338 result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1]; 339 result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2]; 340 result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3]; 341 return result; 342 } 343 344 template<> 345 BX_SIMD_FORCE_INLINE simd128_ref_t simd_div(simd128_ref_t _a, simd128_ref_t _b) 346 { 347 simd128_ref_t result; 348 result.fxyzw[0] = _a.fxyzw[0] / _b.fxyzw[0]; 349 result.fxyzw[1] = _a.fxyzw[1] / _b.fxyzw[1]; 350 result.fxyzw[2] = _a.fxyzw[2] / _b.fxyzw[2]; 351 result.fxyzw[3] = _a.fxyzw[3] / _b.fxyzw[3]; 352 return result; 353 } 354 355 template<> 356 BX_SIMD_FORCE_INLINE simd128_ref_t simd_rcp_est(simd128_ref_t _a) 357 { 358 simd128_ref_t result; 359 result.fxyzw[0] = 1.0f / _a.fxyzw[0]; 360 result.fxyzw[1] = 1.0f / _a.fxyzw[1]; 361 result.fxyzw[2] = 1.0f / _a.fxyzw[2]; 362 result.fxyzw[3] = 1.0f / _a.fxyzw[3]; 363 return result; 364 } 365 366 template<> 367 BX_SIMD_FORCE_INLINE simd128_ref_t simd_sqrt(simd128_ref_t _a) 368 { 369 simd128_ref_t result; 370 result.fxyzw[0] = sqrt(_a.fxyzw[0]); 371 result.fxyzw[1] = sqrt(_a.fxyzw[1]); 372 result.fxyzw[2] = sqrt(_a.fxyzw[2]); 373 result.fxyzw[3] = sqrt(_a.fxyzw[3]); 374 return result; 375 } 376 377 template<> 378 BX_SIMD_FORCE_INLINE simd128_ref_t simd_rsqrt_est(simd128_ref_t _a) 379 { 380 simd128_ref_t result; 381 result.fxyzw[0] = rsqrt(_a.fxyzw[0]); 382 result.fxyzw[1] = rsqrt(_a.fxyzw[1]); 383 result.fxyzw[2] = rsqrt(_a.fxyzw[2]); 384 result.fxyzw[3] = rsqrt(_a.fxyzw[3]); 385 return result; 386 } 387 388 template<> 389 BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmpeq(simd128_ref_t _a, simd128_ref_t _b) 390 { 391 simd128_ref_t result; 392 result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0; 393 result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0; 394 result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0; 395 result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0; 396 return result; 397 } 398 399 template<> 400 BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmplt(simd128_ref_t _a, simd128_ref_t _b) 401 { 402 simd128_ref_t result; 403 result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0; 404 result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0; 405 result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0; 406 result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0; 407 return result; 408 } 409 410 template<> 411 BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmple(simd128_ref_t _a, simd128_ref_t _b) 412 { 413 simd128_ref_t result; 414 result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0; 415 result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0; 416 result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0; 417 result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0; 418 return result; 419 } 420 421 template<> 422 BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmpgt(simd128_ref_t _a, simd128_ref_t _b) 423 { 424 simd128_ref_t result; 425 result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0; 426 result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0; 427 result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0; 428 result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0; 429 return result; 430 } 431 432 template<> 433 BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmpge(simd128_ref_t _a, simd128_ref_t _b) 434 { 435 simd128_ref_t result; 436 result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0; 437 result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0; 438 result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0; 439 result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0; 440 return result; 441 } 442 443 template<> 444 BX_SIMD_FORCE_INLINE simd128_ref_t simd_min(simd128_ref_t _a, simd128_ref_t _b) 445 { 446 simd128_ref_t result; 447 result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0]; 448 result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1]; 449 result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2]; 450 result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3]; 451 return result; 452 } 453 454 template<> 455 BX_SIMD_FORCE_INLINE simd128_ref_t simd_max(simd128_ref_t _a, simd128_ref_t _b) 456 { 457 simd128_ref_t result; 458 result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0]; 459 result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1]; 460 result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2]; 461 result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3]; 462 return result; 463 } 464 465 template<> 466 BX_SIMD_FORCE_INLINE simd128_ref_t simd_and(simd128_ref_t _a, simd128_ref_t _b) 467 { 468 simd128_ref_t result; 469 result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0]; 470 result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1]; 471 result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2]; 472 result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3]; 473 return result; 474 } 475 476 template<> 477 BX_SIMD_FORCE_INLINE simd128_ref_t simd_andc(simd128_ref_t _a, simd128_ref_t _b) 478 { 479 simd128_ref_t result; 480 result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0]; 481 result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1]; 482 result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2]; 483 result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3]; 484 return result; 485 } 486 487 template<> 488 BX_SIMD_FORCE_INLINE simd128_ref_t simd_or(simd128_ref_t _a, simd128_ref_t _b) 489 { 490 simd128_ref_t result; 491 result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0]; 492 result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1]; 493 result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2]; 494 result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3]; 495 return result; 496 } 497 498 template<> 499 BX_SIMD_FORCE_INLINE simd128_ref_t simd_xor(simd128_ref_t _a, simd128_ref_t _b) 500 { 501 simd128_ref_t result; 502 result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0]; 503 result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1]; 504 result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2]; 505 result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3]; 506 return result; 507 } 508 509 template<> 510 BX_SIMD_FORCE_INLINE simd128_ref_t simd_sll(simd128_ref_t _a, int _count) 511 { 512 simd128_ref_t result; 513 result.uxyzw[0] = _a.uxyzw[0] << _count; 514 result.uxyzw[1] = _a.uxyzw[1] << _count; 515 result.uxyzw[2] = _a.uxyzw[2] << _count; 516 result.uxyzw[3] = _a.uxyzw[3] << _count; 517 return result; 518 } 519 520 template<> 521 BX_SIMD_FORCE_INLINE simd128_ref_t simd_srl(simd128_ref_t _a, int _count) 522 { 523 simd128_ref_t result; 524 result.uxyzw[0] = _a.uxyzw[0] >> _count; 525 result.uxyzw[1] = _a.uxyzw[1] >> _count; 526 result.uxyzw[2] = _a.uxyzw[2] >> _count; 527 result.uxyzw[3] = _a.uxyzw[3] >> _count; 528 return result; 529 } 530 531 template<> 532 BX_SIMD_FORCE_INLINE simd128_ref_t simd_sra(simd128_ref_t _a, int _count) 533 { 534 simd128_ref_t result; 535 result.ixyzw[0] = _a.ixyzw[0] >> _count; 536 result.ixyzw[1] = _a.ixyzw[1] >> _count; 537 result.ixyzw[2] = _a.ixyzw[2] >> _count; 538 result.ixyzw[3] = _a.ixyzw[3] >> _count; 539 return result; 540 } 541 542 template<> 543 BX_SIMD_FORCE_INLINE simd128_ref_t simd_icmpeq(simd128_ref_t _a, simd128_ref_t _b) 544 { 545 simd128_ref_t result; 546 result.ixyzw[0] = _a.ixyzw[0] == _b.ixyzw[0] ? 0xffffffff : 0x0; 547 result.ixyzw[1] = _a.ixyzw[1] == _b.ixyzw[1] ? 0xffffffff : 0x0; 548 result.ixyzw[2] = _a.ixyzw[2] == _b.ixyzw[2] ? 0xffffffff : 0x0; 549 result.ixyzw[3] = _a.ixyzw[3] == _b.ixyzw[3] ? 0xffffffff : 0x0; 550 return result; 551 } 552 553 template<> 554 BX_SIMD_FORCE_INLINE simd128_ref_t simd_icmplt(simd128_ref_t _a, simd128_ref_t _b) 555 { 556 simd128_ref_t result; 557 result.ixyzw[0] = _a.ixyzw[0] < _b.ixyzw[0] ? 0xffffffff : 0x0; 558 result.ixyzw[1] = _a.ixyzw[1] < _b.ixyzw[1] ? 0xffffffff : 0x0; 559 result.ixyzw[2] = _a.ixyzw[2] < _b.ixyzw[2] ? 0xffffffff : 0x0; 560 result.ixyzw[3] = _a.ixyzw[3] < _b.ixyzw[3] ? 0xffffffff : 0x0; 561 return result; 562 } 563 564 template<> 565 BX_SIMD_FORCE_INLINE simd128_ref_t simd_icmpgt(simd128_ref_t _a, simd128_ref_t _b) 566 { 567 simd128_ref_t result; 568 result.ixyzw[0] = _a.ixyzw[0] > _b.ixyzw[0] ? 0xffffffff : 0x0; 569 result.ixyzw[1] = _a.ixyzw[1] > _b.ixyzw[1] ? 0xffffffff : 0x0; 570 result.ixyzw[2] = _a.ixyzw[2] > _b.ixyzw[2] ? 0xffffffff : 0x0; 571 result.ixyzw[3] = _a.ixyzw[3] > _b.ixyzw[3] ? 0xffffffff : 0x0; 572 return result; 573 } 574 575 template<> 576 BX_SIMD_FORCE_INLINE simd128_ref_t simd_imin(simd128_ref_t _a, simd128_ref_t _b) 577 { 578 simd128_ref_t result; 579 result.ixyzw[0] = _a.ixyzw[0] < _b.ixyzw[0] ? _a.ixyzw[0] : _b.ixyzw[0]; 580 result.ixyzw[1] = _a.ixyzw[1] < _b.ixyzw[1] ? _a.ixyzw[1] : _b.ixyzw[1]; 581 result.ixyzw[2] = _a.ixyzw[2] < _b.ixyzw[2] ? _a.ixyzw[2] : _b.ixyzw[2]; 582 result.ixyzw[3] = _a.ixyzw[3] < _b.ixyzw[3] ? _a.ixyzw[3] : _b.ixyzw[3]; 583 return result; 584 } 585 586 template<> 587 BX_SIMD_FORCE_INLINE simd128_ref_t simd_imax(simd128_ref_t _a, simd128_ref_t _b) 588 { 589 simd128_ref_t result; 590 result.ixyzw[0] = _a.ixyzw[0] > _b.ixyzw[0] ? _a.ixyzw[0] : _b.ixyzw[0]; 591 result.ixyzw[1] = _a.ixyzw[1] > _b.ixyzw[1] ? _a.ixyzw[1] : _b.ixyzw[1]; 592 result.ixyzw[2] = _a.ixyzw[2] > _b.ixyzw[2] ? _a.ixyzw[2] : _b.ixyzw[2]; 593 result.ixyzw[3] = _a.ixyzw[3] > _b.ixyzw[3] ? _a.ixyzw[3] : _b.ixyzw[3]; 594 return result; 595 } 596 597 template<> 598 BX_SIMD_FORCE_INLINE simd128_ref_t simd_iadd(simd128_ref_t _a, simd128_ref_t _b) 599 { 600 simd128_ref_t result; 601 result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0]; 602 result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1]; 603 result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2]; 604 result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3]; 605 return result; 606 } 607 608 template<> 609 BX_SIMD_FORCE_INLINE simd128_ref_t simd_isub(simd128_ref_t _a, simd128_ref_t _b) 610 { 611 simd128_ref_t result; 612 result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0]; 613 result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1]; 614 result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2]; 615 result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3]; 616 return result; 617 } 618 619 BX_SIMD_FORCE_INLINE simd128_t simd_zero() 620 { 621 return simd_zero<simd128_t>(); 622 } 623 624 BX_SIMD_FORCE_INLINE simd128_t simd_ld(const void* _ptr) 625 { 626 return simd_ld<simd128_t>(_ptr); 627 } 628 629 BX_SIMD_FORCE_INLINE simd128_t simd_ld(float _x, float _y, float _z, float _w) 630 { 631 return simd_ld<simd128_t>(_x, _y, _z, _w); 632 } 633 634 BX_SIMD_FORCE_INLINE simd128_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) 635 { 636 return simd_ild<simd128_t>(_x, _y, _z, _w); 637 } 638 639 BX_SIMD_FORCE_INLINE simd128_t simd_splat(const void* _ptr) 640 { 641 return simd_splat<simd128_t>(_ptr); 642 } 643 644 BX_SIMD_FORCE_INLINE simd128_t simd_splat(float _a) 645 { 646 return simd_splat<simd128_t>(_a); 647 } 648 649 BX_SIMD_FORCE_INLINE simd128_t simd_isplat(uint32_t _a) 650 { 651 return simd_isplat<simd128_t>(_a); 652 } 653 654 template<> 655 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_xAzC(simd128_ref_t _a, simd128_ref_t _b) 656 { 657 return simd_shuf_xAzC_ni(_a, _b); 658 } 659 660 template<> 661 BX_SIMD_FORCE_INLINE simd128_ref_t simd_shuf_yBwD(simd128_ref_t _a, simd128_ref_t _b) 662 { 663 return simd_shuf_yBwD_ni(_a, _b); 664 } 665 666 template<> 667 BX_SIMD_FORCE_INLINE simd128_ref_t simd_rcp(simd128_ref_t _a) 668 { 669 return simd_rcp_ni(_a); 670 } 671 672 template<> 673 BX_SIMD_FORCE_INLINE simd128_ref_t simd_orx(simd128_ref_t _a) 674 { 675 return simd_orx_ni(_a); 676 } 677 678 template<> 679 BX_SIMD_FORCE_INLINE simd128_ref_t simd_orc(simd128_ref_t _a, simd128_ref_t _b) 680 { 681 return simd_orc_ni(_a, _b); 682 } 683 684 template<> 685 BX_SIMD_FORCE_INLINE simd128_ref_t simd_neg(simd128_ref_t _a) 686 { 687 return simd_neg_ni(_a); 688 } 689 690 template<> 691 BX_SIMD_FORCE_INLINE simd128_ref_t simd_madd(simd128_ref_t _a, simd128_ref_t _b, simd128_ref_t _c) 692 { 693 return simd_madd_ni(_a, _b, _c); 694 } 695 696 template<> 697 BX_SIMD_FORCE_INLINE simd128_ref_t simd_nmsub(simd128_ref_t _a, simd128_ref_t _b, simd128_ref_t _c) 698 { 699 return simd_nmsub_ni(_a, _b, _c); 700 } 701 702 template<> 703 BX_SIMD_FORCE_INLINE simd128_ref_t simd_div_nr(simd128_ref_t _a, simd128_ref_t _b) 704 { 705 return simd_div_nr_ni(_a, _b); 706 } 707 708 template<> 709 BX_SIMD_FORCE_INLINE simd128_ref_t simd_selb(simd128_ref_t _mask, simd128_ref_t _a, simd128_ref_t _b) 710 { 711 return simd_selb_ni(_mask, _a, _b); 712 } 713 714 template<> 715 BX_SIMD_FORCE_INLINE simd128_ref_t simd_sels(simd128_ref_t _test, simd128_ref_t _a, simd128_ref_t _b) 716 { 717 return simd_sels_ni(_test, _a, _b); 718 } 719 720 template<> 721 BX_SIMD_FORCE_INLINE simd128_ref_t simd_not(simd128_ref_t _a) 722 { 723 return simd_not_ni(_a); 724 } 725 726 template<> 727 BX_SIMD_FORCE_INLINE simd128_ref_t simd_abs(simd128_ref_t _a) 728 { 729 return simd_abs_ni(_a); 730 } 731 732 template<> 733 BX_SIMD_FORCE_INLINE simd128_ref_t simd_clamp(simd128_ref_t _a, simd128_ref_t _min, simd128_ref_t _max) 734 { 735 return simd_clamp_ni(_a, _min, _max); 736 } 737 738 template<> 739 BX_SIMD_FORCE_INLINE simd128_ref_t simd_lerp(simd128_ref_t _a, simd128_ref_t _b, simd128_ref_t _s) 740 { 741 return simd_lerp_ni(_a, _b, _s); 742 } 743 744 template<> 745 BX_SIMD_FORCE_INLINE simd128_ref_t simd_rsqrt(simd128_ref_t _a) 746 { 747 return simd_rsqrt_ni(_a); 748 } 749 750 template<> 751 BX_SIMD_FORCE_INLINE simd128_ref_t simd_rsqrt_nr(simd128_ref_t _a) 752 { 753 return simd_rsqrt_nr_ni(_a); 754 } 755 756 template<> 757 BX_SIMD_FORCE_INLINE simd128_ref_t simd_rsqrt_carmack(simd128_ref_t _a) 758 { 759 return simd_rsqrt_carmack_ni(_a); 760 } 761 762 template<> 763 BX_SIMD_FORCE_INLINE simd128_ref_t simd_sqrt_nr(simd128_ref_t _a) 764 { 765 return simd_sqrt_nr_ni(_a); 766 } 767 768 template<> 769 BX_SIMD_FORCE_INLINE simd128_ref_t simd_log2(simd128_ref_t _a) 770 { 771 return simd_log2_ni(_a); 772 } 773 774 template<> 775 BX_SIMD_FORCE_INLINE simd128_ref_t simd_exp2(simd128_ref_t _a) 776 { 777 return simd_exp2_ni(_a); 778 } 779 780 template<> 781 BX_SIMD_FORCE_INLINE simd128_ref_t simd_pow(simd128_ref_t _a, simd128_ref_t _b) 782 { 783 return simd_pow_ni(_a, _b); 784 } 785 786 template<> 787 BX_SIMD_FORCE_INLINE simd128_ref_t simd_cross3(simd128_ref_t _a, simd128_ref_t _b) 788 { 789 return simd_cross3_ni(_a, _b); 790 } 791 792 template<> 793 BX_SIMD_FORCE_INLINE simd128_ref_t simd_normalize3(simd128_ref_t _a) 794 { 795 return simd_normalize3_ni(_a); 796 } 797 798 template<> 799 BX_SIMD_FORCE_INLINE simd128_ref_t simd_dot3(simd128_ref_t _a, simd128_ref_t _b) 800 { 801 return simd_dot3_ni(_a, _b); 802 } 803 804 template<> 805 BX_SIMD_FORCE_INLINE simd128_ref_t simd_dot(simd128_ref_t _a, simd128_ref_t _b) 806 { 807 return simd_dot_ni(_a, _b); 808 } 809 810 template<> 811 BX_SIMD_FORCE_INLINE simd128_ref_t simd_ceil(simd128_ref_t _a) 812 { 813 return simd_ceil_ni(_a); 814 } 815 816 template<> 817 BX_SIMD_FORCE_INLINE simd128_ref_t simd_floor(simd128_ref_t _a) 818 { 819 return simd_floor_ni(_a); 820 } 821 822} // namespace bx 823