1/*++ 2 3Copyright (c) Microsoft Corporation. All rights reserved. 4 5Module Name: 6 7 xnamathvector.inl 8 9Abstract: 10 11 XNA math library for Windows and Xbox 360: Vector functions 12--*/ 13 14#if defined(_MSC_VER) && (_MSC_VER > 1000) 15#pragma once 16#endif 17 18#ifndef __XNAMATHVECTOR_INL__ 19#define __XNAMATHVECTOR_INL__ 20 21#if defined(_XM_NO_INTRINSICS_) 22#define XMISNAN(x) ((*(UINT*)&(x) & 0x7F800000) == 0x7F800000 && (*(UINT*)&(x) & 0x7FFFFF) != 0) 23#define XMISINF(x) ((*(UINT*)&(x) & 0x7FFFFFFF) == 0x7F800000) 24#endif 25 26/**************************************************************************** 27 * 28 * General Vector 29 * 30 ****************************************************************************/ 31 32//------------------------------------------------------------------------------ 33// Assignment operations 34//------------------------------------------------------------------------------ 35 36//------------------------------------------------------------------------------ 37// Return a vector with all elements equaling zero 38XMFINLINE XMVECTOR XMVectorZero() 39{ 40#if defined(_XM_NO_INTRINSICS_) 41 XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f}; 42 return vResult; 43#elif defined(_XM_SSE_INTRINSICS_) 44 return _mm_setzero_ps(); 45#else // _XM_VMX128_INTRINSICS_ 46#endif // _XM_VMX128_INTRINSICS_ 47} 48 49//------------------------------------------------------------------------------ 50// Initialize a vector with four floating point values 51XMFINLINE XMVECTOR XMVectorSet 52( 53 FLOAT x, 54 FLOAT y, 55 FLOAT z, 56 FLOAT w 57) 58{ 59#if defined(_XM_NO_INTRINSICS_) 60 XMVECTORF32 vResult = {x,y,z,w}; 61 return vResult.v; 62#elif defined(_XM_SSE_INTRINSICS_) 63 return _mm_set_ps( w, z, y, x ); 64#else // _XM_VMX128_INTRINSICS_ 65#endif // _XM_VMX128_INTRINSICS_ 66} 67 68//------------------------------------------------------------------------------ 69// Initialize a vector with four integer values 70XMFINLINE XMVECTOR XMVectorSetInt 71( 72 UINT x, 73 UINT y, 74 UINT z, 75 UINT w 76) 77{ 78#if defined(_XM_NO_INTRINSICS_) 79 XMVECTORU32 vResult = {x,y,z,w}; 80 return vResult.v; 81#elif defined(_XM_SSE_INTRINSICS_) 82 __m128i V = _mm_set_epi32( w, z, y, x ); 83 return reinterpret_cast<__m128 *>(&V)[0]; 84#else // _XM_VMX128_INTRINSICS_ 85#endif // _XM_VMX128_INTRINSICS_ 86} 87 88//------------------------------------------------------------------------------ 89// Initialize a vector with a replicated floating point value 90XMFINLINE XMVECTOR XMVectorReplicate 91( 92 FLOAT Value 93) 94{ 95#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 96 XMVECTORF32 vResult = {Value,Value,Value,Value}; 97 return vResult.v; 98#elif defined(_XM_SSE_INTRINSICS_) 99 return _mm_set_ps1( Value ); 100#else // _XM_VMX128_INTRINSICS_ 101#endif // _XM_VMX128_INTRINSICS_ 102} 103 104//------------------------------------------------------------------------------ 105// Initialize a vector with a replicated floating point value passed by pointer 106XMFINLINE XMVECTOR XMVectorReplicatePtr 107( 108 CONST FLOAT *pValue 109) 110{ 111#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 112 FLOAT Value = pValue[0]; 113 XMVECTORF32 vResult = {Value,Value,Value,Value}; 114 return vResult.v; 115#elif defined(_XM_SSE_INTRINSICS_) 116 return _mm_load_ps1( pValue ); 117#else // _XM_VMX128_INTRINSICS_ 118#endif // _XM_VMX128_INTRINSICS_ 119} 120 121//------------------------------------------------------------------------------ 122// Initialize a vector with a replicated integer value 123XMFINLINE XMVECTOR XMVectorReplicateInt 124( 125 UINT Value 126) 127{ 128#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 129 XMVECTORU32 vResult = {Value,Value,Value,Value}; 130 return vResult.v; 131#elif defined(_XM_SSE_INTRINSICS_) 132 __m128i vTemp = _mm_set1_epi32( Value ); 133 return reinterpret_cast<const __m128 *>(&vTemp)[0]; 134#else // _XM_VMX128_INTRINSICS_ 135#endif // _XM_VMX128_INTRINSICS_ 136} 137 138//------------------------------------------------------------------------------ 139// Initialize a vector with a replicated integer value passed by pointer 140XMFINLINE XMVECTOR XMVectorReplicateIntPtr 141( 142 CONST UINT *pValue 143) 144{ 145#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 146 UINT Value = pValue[0]; 147 XMVECTORU32 vResult = {Value,Value,Value,Value}; 148 return vResult.v; 149#elif defined(_XM_SSE_INTRINSICS_) 150 return _mm_load_ps1(reinterpret_cast<const float *>(pValue)); 151#else // _XM_VMX128_INTRINSICS_ 152#endif // _XM_VMX128_INTRINSICS_ 153} 154 155//------------------------------------------------------------------------------ 156// Initialize a vector with all bits set (true mask) 157XMFINLINE XMVECTOR XMVectorTrueInt() 158{ 159#if defined(_XM_NO_INTRINSICS_) 160 XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU}; 161 return vResult.v; 162#elif defined(_XM_SSE_INTRINSICS_) 163 __m128i V = _mm_set1_epi32(-1); 164 return reinterpret_cast<__m128 *>(&V)[0]; 165#else // _XM_VMX128_INTRINSICS_ 166#endif // _XM_VMX128_INTRINSICS_ 167} 168 169//------------------------------------------------------------------------------ 170// Initialize a vector with all bits clear (false mask) 171XMFINLINE XMVECTOR XMVectorFalseInt() 172{ 173#if defined(_XM_NO_INTRINSICS_) 174 XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f}; 175 return vResult; 176#elif defined(_XM_SSE_INTRINSICS_) 177 return _mm_setzero_ps(); 178#else // _XM_VMX128_INTRINSICS_ 179#endif // _XM_VMX128_INTRINSICS_ 180} 181 182//------------------------------------------------------------------------------ 183// Replicate the x component of the vector 184XMFINLINE XMVECTOR XMVectorSplatX 185( 186 FXMVECTOR V 187) 188{ 189#if defined(_XM_NO_INTRINSICS_) 190 XMVECTOR vResult; 191 vResult.vector4_f32[0] = 192 vResult.vector4_f32[1] = 193 vResult.vector4_f32[2] = 194 vResult.vector4_f32[3] = V.vector4_f32[0]; 195 return vResult; 196#elif defined(_XM_SSE_INTRINSICS_) 197 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) ); 198#else // _XM_VMX128_INTRINSICS_ 199#endif // _XM_VMX128_INTRINSICS_ 200} 201 202//------------------------------------------------------------------------------ 203// Replicate the y component of the vector 204XMFINLINE XMVECTOR XMVectorSplatY 205( 206 FXMVECTOR V 207) 208{ 209#if defined(_XM_NO_INTRINSICS_) 210 XMVECTOR vResult; 211 vResult.vector4_f32[0] = 212 vResult.vector4_f32[1] = 213 vResult.vector4_f32[2] = 214 vResult.vector4_f32[3] = V.vector4_f32[1]; 215 return vResult; 216#elif defined(_XM_SSE_INTRINSICS_) 217 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) ); 218#else // _XM_VMX128_INTRINSICS_ 219#endif // _XM_VMX128_INTRINSICS_ 220} 221 222//------------------------------------------------------------------------------ 223// Replicate the z component of the vector 224XMFINLINE XMVECTOR XMVectorSplatZ 225( 226 FXMVECTOR V 227) 228{ 229#if defined(_XM_NO_INTRINSICS_) 230 XMVECTOR vResult; 231 vResult.vector4_f32[0] = 232 vResult.vector4_f32[1] = 233 vResult.vector4_f32[2] = 234 vResult.vector4_f32[3] = V.vector4_f32[2]; 235 return vResult; 236#elif defined(_XM_SSE_INTRINSICS_) 237 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) ); 238#else // _XM_VMX128_INTRINSICS_ 239#endif // _XM_VMX128_INTRINSICS_ 240} 241 242//------------------------------------------------------------------------------ 243// Replicate the w component of the vector 244XMFINLINE XMVECTOR XMVectorSplatW 245( 246 FXMVECTOR V 247) 248{ 249#if defined(_XM_NO_INTRINSICS_) 250 XMVECTOR vResult; 251 vResult.vector4_f32[0] = 252 vResult.vector4_f32[1] = 253 vResult.vector4_f32[2] = 254 vResult.vector4_f32[3] = V.vector4_f32[3]; 255 return vResult; 256#elif defined(_XM_SSE_INTRINSICS_) 257 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) ); 258#else // _XM_VMX128_INTRINSICS_ 259#endif // _XM_VMX128_INTRINSICS_ 260} 261 262//------------------------------------------------------------------------------ 263// Return a vector of 1.0f,1.0f,1.0f,1.0f 264XMFINLINE XMVECTOR XMVectorSplatOne() 265{ 266#if defined(_XM_NO_INTRINSICS_) 267 XMVECTOR vResult; 268 vResult.vector4_f32[0] = 269 vResult.vector4_f32[1] = 270 vResult.vector4_f32[2] = 271 vResult.vector4_f32[3] = 1.0f; 272 return vResult; 273#elif defined(_XM_SSE_INTRINSICS_) 274 return g_XMOne; 275#else // _XM_VMX128_INTRINSICS_ 276#endif // _XM_VMX128_INTRINSICS_ 277} 278 279//------------------------------------------------------------------------------ 280// Return a vector of INF,INF,INF,INF 281XMFINLINE XMVECTOR XMVectorSplatInfinity() 282{ 283#if defined(_XM_NO_INTRINSICS_) 284 XMVECTOR vResult; 285 vResult.vector4_u32[0] = 286 vResult.vector4_u32[1] = 287 vResult.vector4_u32[2] = 288 vResult.vector4_u32[3] = 0x7F800000; 289 return vResult; 290#elif defined(_XM_SSE_INTRINSICS_) 291 return g_XMInfinity; 292#else // _XM_VMX128_INTRINSICS_ 293#endif // _XM_VMX128_INTRINSICS_ 294} 295 296//------------------------------------------------------------------------------ 297// Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN 298XMFINLINE XMVECTOR XMVectorSplatQNaN() 299{ 300#if defined(_XM_NO_INTRINSICS_) 301 XMVECTOR vResult; 302 vResult.vector4_u32[0] = 303 vResult.vector4_u32[1] = 304 vResult.vector4_u32[2] = 305 vResult.vector4_u32[3] = 0x7FC00000; 306 return vResult; 307#elif defined(_XM_SSE_INTRINSICS_) 308 return g_XMQNaN; 309#else // _XM_VMX128_INTRINSICS_ 310#endif // _XM_VMX128_INTRINSICS_ 311} 312 313//------------------------------------------------------------------------------ 314// Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f 315XMFINLINE XMVECTOR XMVectorSplatEpsilon() 316{ 317#if defined(_XM_NO_INTRINSICS_) 318 XMVECTOR vResult; 319 vResult.vector4_u32[0] = 320 vResult.vector4_u32[1] = 321 vResult.vector4_u32[2] = 322 vResult.vector4_u32[3] = 0x34000000; 323 return vResult; 324#elif defined(_XM_SSE_INTRINSICS_) 325 return g_XMEpsilon; 326#else // _XM_VMX128_INTRINSICS_ 327#endif // _XM_VMX128_INTRINSICS_ 328} 329 330//------------------------------------------------------------------------------ 331// Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f 332XMFINLINE XMVECTOR XMVectorSplatSignMask() 333{ 334#if defined(_XM_NO_INTRINSICS_) 335 XMVECTOR vResult; 336 vResult.vector4_u32[0] = 337 vResult.vector4_u32[1] = 338 vResult.vector4_u32[2] = 339 vResult.vector4_u32[3] = 0x80000000U; 340 return vResult; 341#elif defined(_XM_SSE_INTRINSICS_) 342 __m128i V = _mm_set1_epi32( 0x80000000 ); 343 return reinterpret_cast<__m128*>(&V)[0]; 344#else // _XM_VMX128_INTRINSICS_ 345#endif // _XM_VMX128_INTRINSICS_ 346} 347 348//------------------------------------------------------------------------------ 349// Return a floating point value via an index. This is not a recommended 350// function to use due to performance loss. 351XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i) 352{ 353 XMASSERT( i <= 3 ); 354#if defined(_XM_NO_INTRINSICS_) 355 return V.vector4_f32[i]; 356#elif defined(_XM_SSE_INTRINSICS_) 357 return V.m128_f32[i]; 358#else // _XM_VMX128_INTRINSICS_ 359#endif // _XM_VMX128_INTRINSICS_ 360} 361 362//------------------------------------------------------------------------------ 363// Return the X component in an FPU register. 364// This causes Load/Hit/Store on VMX targets 365XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V) 366{ 367#if defined(_XM_NO_INTRINSICS_) 368 return V.vector4_f32[0]; 369#elif defined(_XM_SSE_INTRINSICS_) 370#if defined(_MSC_VER) && (_MSC_VER>=1500) 371 return _mm_cvtss_f32(V); 372#else 373 return V.m128_f32[0]; 374#endif 375#else // _XM_VMX128_INTRINSICS_ 376#endif // _XM_VMX128_INTRINSICS_ 377} 378 379// Return the Y component in an FPU register. 380// This causes Load/Hit/Store on VMX targets 381XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V) 382{ 383#if defined(_XM_NO_INTRINSICS_) 384 return V.vector4_f32[1]; 385#elif defined(_XM_SSE_INTRINSICS_) 386#if defined(_MSC_VER) && (_MSC_VER>=1500) 387 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 388 return _mm_cvtss_f32(vTemp); 389#else 390 return V.m128_f32[1]; 391#endif 392#else // _XM_VMX128_INTRINSICS_ 393#endif // _XM_VMX128_INTRINSICS_ 394} 395 396// Return the Z component in an FPU register. 397// This causes Load/Hit/Store on VMX targets 398XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V) 399{ 400#if defined(_XM_NO_INTRINSICS_) 401 return V.vector4_f32[2]; 402#elif defined(_XM_SSE_INTRINSICS_) 403#if defined(_MSC_VER) && (_MSC_VER>=1500) 404 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); 405 return _mm_cvtss_f32(vTemp); 406#else 407 return V.m128_f32[2]; 408#endif 409#else // _XM_VMX128_INTRINSICS_ 410#endif // _XM_VMX128_INTRINSICS_ 411} 412 413// Return the W component in an FPU register. 414// This causes Load/Hit/Store on VMX targets 415XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V) 416{ 417#if defined(_XM_NO_INTRINSICS_) 418 return V.vector4_f32[3]; 419#elif defined(_XM_SSE_INTRINSICS_) 420#if defined(_MSC_VER) && (_MSC_VER>=1500) 421 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); 422 return _mm_cvtss_f32(vTemp); 423#else 424 return V.m128_f32[3]; 425#endif 426#else // _XM_VMX128_INTRINSICS_ 427#endif // _XM_VMX128_INTRINSICS_ 428} 429 430//------------------------------------------------------------------------------ 431 432// Store a component indexed by i into a 32 bit float location in memory. 433// This causes Load/Hit/Store on VMX targets 434XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i) 435{ 436 XMASSERT( f != 0 ); 437 XMASSERT( i < 4 ); 438#if defined(_XM_NO_INTRINSICS_) 439 *f = V.vector4_f32[i]; 440#elif defined(_XM_SSE_INTRINSICS_) 441 *f = V.m128_f32[i]; 442#else // _XM_VMX128_INTRINSICS_ 443#endif // _XM_VMX128_INTRINSICS_ 444} 445 446//------------------------------------------------------------------------------ 447 448// Store the X component into a 32 bit float location in memory. 449XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V) 450{ 451 XMASSERT( x != 0 ); 452#if defined(_XM_NO_INTRINSICS_) 453 *x = V.vector4_f32[0]; 454#elif defined(_XM_SSE_INTRINSICS_) 455 _mm_store_ss(x,V); 456#else // _XM_VMX128_INTRINSICS_ 457#endif // _XM_VMX128_INTRINSICS_ 458} 459 460// Store the Y component into a 32 bit float location in memory. 461XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V) 462{ 463 XMASSERT( y != 0 ); 464#if defined(_XM_NO_INTRINSICS_) 465 *y = V.vector4_f32[1]; 466#elif defined(_XM_SSE_INTRINSICS_) 467 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 468 _mm_store_ss(y,vResult); 469#else // _XM_VMX128_INTRINSICS_ 470#endif // _XM_VMX128_INTRINSICS_ 471} 472 473// Store the Z component into a 32 bit float location in memory. 474XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V) 475{ 476 XMASSERT( z != 0 ); 477#if defined(_XM_NO_INTRINSICS_) 478 *z = V.vector4_f32[2]; 479#elif defined(_XM_SSE_INTRINSICS_) 480 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); 481 _mm_store_ss(z,vResult); 482#else // _XM_VMX128_INTRINSICS_ 483#endif // _XM_VMX128_INTRINSICS_ 484} 485 486// Store the W component into a 32 bit float location in memory. 487XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V) 488{ 489 XMASSERT( w != 0 ); 490#if defined(_XM_NO_INTRINSICS_) 491 *w = V.vector4_f32[3]; 492#elif defined(_XM_SSE_INTRINSICS_) 493 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); 494 _mm_store_ss(w,vResult); 495#else // _XM_VMX128_INTRINSICS_ 496#endif // _XM_VMX128_INTRINSICS_ 497} 498 499//------------------------------------------------------------------------------ 500 501// Return an integer value via an index. This is not a recommended 502// function to use due to performance loss. 503XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i) 504{ 505 XMASSERT( i < 4 ); 506#if defined(_XM_NO_INTRINSICS_) 507 return V.vector4_u32[i]; 508#elif defined(_XM_SSE_INTRINSICS_) 509#if defined(_MSC_VER) && (_MSC_VER<1400) 510 XMVECTORU32 tmp; 511 tmp.v = V; 512 return tmp.u[i]; 513#else 514 return V.m128_u32[i]; 515#endif 516#else // _XM_VMX128_INTRINSICS_ 517#endif // _XM_VMX128_INTRINSICS_ 518} 519 520//------------------------------------------------------------------------------ 521 522// Return the X component in an integer register. 523// This causes Load/Hit/Store on VMX targets 524XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V) 525{ 526#if defined(_XM_NO_INTRINSICS_) 527 return V.vector4_u32[0]; 528#elif defined(_XM_SSE_INTRINSICS_) 529 return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0])); 530#else // _XM_VMX128_INTRINSICS_ 531#endif // _XM_VMX128_INTRINSICS_ 532} 533 534// Return the Y component in an integer register. 535// This causes Load/Hit/Store on VMX targets 536XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V) 537{ 538#if defined(_XM_NO_INTRINSICS_) 539 return V.vector4_u32[1]; 540#elif defined(_XM_SSE_INTRINSICS_) 541 __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1)); 542 return static_cast<UINT>(_mm_cvtsi128_si32(vResulti)); 543#else // _XM_VMX128_INTRINSICS_ 544#endif // _XM_VMX128_INTRINSICS_ 545} 546 547// Return the Z component in an integer register. 548// This causes Load/Hit/Store on VMX targets 549XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V) 550{ 551#if defined(_XM_NO_INTRINSICS_) 552 return V.vector4_u32[2]; 553#elif defined(_XM_SSE_INTRINSICS_) 554 __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2)); 555 return static_cast<UINT>(_mm_cvtsi128_si32(vResulti)); 556#else // _XM_VMX128_INTRINSICS_ 557#endif // _XM_VMX128_INTRINSICS_ 558} 559 560// Return the W component in an integer register. 561// This causes Load/Hit/Store on VMX targets 562XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V) 563{ 564#if defined(_XM_NO_INTRINSICS_) 565 return V.vector4_u32[3]; 566#elif defined(_XM_SSE_INTRINSICS_) 567 __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3)); 568 return static_cast<UINT>(_mm_cvtsi128_si32(vResulti)); 569#else // _XM_VMX128_INTRINSICS_ 570#endif // _XM_VMX128_INTRINSICS_ 571} 572 573//------------------------------------------------------------------------------ 574 575// Store a component indexed by i into a 32 bit integer location in memory. 576// This causes Load/Hit/Store on VMX targets 577XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i) 578{ 579 XMASSERT( x != 0 ); 580 XMASSERT( i < 4 ); 581#if defined(_XM_NO_INTRINSICS_) 582 *x = V.vector4_u32[i]; 583#elif defined(_XM_SSE_INTRINSICS_) 584#if defined(_MSC_VER) && (_MSC_VER<1400) 585 XMVECTORU32 tmp; 586 tmp.v = V; 587 *x = tmp.u[i]; 588#else 589 *x = V.m128_u32[i]; 590#endif 591#else // _XM_VMX128_INTRINSICS_ 592#endif // _XM_VMX128_INTRINSICS_ 593} 594 595//------------------------------------------------------------------------------ 596 597// Store the X component into a 32 bit integer location in memory. 598XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V) 599{ 600 XMASSERT( x != 0 ); 601#if defined(_XM_NO_INTRINSICS_) 602 *x = V.vector4_u32[0]; 603#elif defined(_XM_SSE_INTRINSICS_) 604 _mm_store_ss(reinterpret_cast<float *>(x),V); 605#else // _XM_VMX128_INTRINSICS_ 606#endif // _XM_VMX128_INTRINSICS_ 607} 608 609// Store the Y component into a 32 bit integer location in memory. 610XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V) 611{ 612 XMASSERT( y != 0 ); 613#if defined(_XM_NO_INTRINSICS_) 614 *y = V.vector4_u32[1]; 615#elif defined(_XM_SSE_INTRINSICS_) 616 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 617 _mm_store_ss(reinterpret_cast<float *>(y),vResult); 618#else // _XM_VMX128_INTRINSICS_ 619#endif // _XM_VMX128_INTRINSICS_ 620} 621 622// Store the Z component into a 32 bit integer locaCantion in memory. 623XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V) 624{ 625 XMASSERT( z != 0 ); 626#if defined(_XM_NO_INTRINSICS_) 627 *z = V.vector4_u32[2]; 628#elif defined(_XM_SSE_INTRINSICS_) 629 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); 630 _mm_store_ss(reinterpret_cast<float *>(z),vResult); 631#else // _XM_VMX128_INTRINSICS_ 632#endif // _XM_VMX128_INTRINSICS_ 633} 634 635// Store the W component into a 32 bit integer location in memory. 636XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V) 637{ 638 XMASSERT( w != 0 ); 639#if defined(_XM_NO_INTRINSICS_) 640 *w = V.vector4_u32[3]; 641#elif defined(_XM_SSE_INTRINSICS_) 642 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); 643 _mm_store_ss(reinterpret_cast<float *>(w),vResult); 644#else // _XM_VMX128_INTRINSICS_ 645#endif // _XM_VMX128_INTRINSICS_ 646} 647 648//------------------------------------------------------------------------------ 649 650// Set a single indexed floating point component 651// This causes Load/Hit/Store on VMX targets 652XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i) 653{ 654#if defined(_XM_NO_INTRINSICS_) 655 XMVECTOR U; 656 XMASSERT( i <= 3 ); 657 U = V; 658 U.vector4_f32[i] = f; 659 return U; 660#elif defined(_XM_SSE_INTRINSICS_) 661 XMASSERT( i <= 3 ); 662 XMVECTOR U = V; 663 U.m128_f32[i] = f; 664 return U; 665#else // _XM_VMX128_INTRINSICS_ 666#endif // _XM_VMX128_INTRINSICS_ 667} 668 669//------------------------------------------------------------------------------ 670 671// Sets the X component of a vector to a passed floating point value 672// This causes Load/Hit/Store on VMX targets 673XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x) 674{ 675#if defined(_XM_NO_INTRINSICS_) 676 XMVECTOR U; 677 U.vector4_f32[0] = x; 678 U.vector4_f32[1] = V.vector4_f32[1]; 679 U.vector4_f32[2] = V.vector4_f32[2]; 680 U.vector4_f32[3] = V.vector4_f32[3]; 681 return U; 682#elif defined(_XM_SSE_INTRINSICS_) 683#if defined(_XM_ISVS2005_) 684 XMVECTOR vResult = V; 685 vResult.m128_f32[0] = x; 686 return vResult; 687#else 688 XMVECTOR vResult = _mm_set_ss(x); 689 vResult = _mm_move_ss(V,vResult); 690 return vResult; 691#endif // _XM_ISVS2005_ 692#else // _XM_VMX128_INTRINSICS_ 693#endif // _XM_VMX128_INTRINSICS_ 694} 695 696// Sets the Y component of a vector to a passed floating point value 697// This causes Load/Hit/Store on VMX targets 698XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y) 699{ 700#if defined(_XM_NO_INTRINSICS_) 701 XMVECTOR U; 702 U.vector4_f32[0] = V.vector4_f32[0]; 703 U.vector4_f32[1] = y; 704 U.vector4_f32[2] = V.vector4_f32[2]; 705 U.vector4_f32[3] = V.vector4_f32[3]; 706 return U; 707#elif defined(_XM_SSE_INTRINSICS_) 708#if defined(_XM_ISVS2005_) 709 XMVECTOR vResult = V; 710 vResult.m128_f32[1] = y; 711 return vResult; 712#else 713 // Swap y and x 714 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); 715 // Convert input to vector 716 XMVECTOR vTemp = _mm_set_ss(y); 717 // Replace the x component 718 vResult = _mm_move_ss(vResult,vTemp); 719 // Swap y and x again 720 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); 721 return vResult; 722#endif // _XM_ISVS2005_ 723#else // _XM_VMX128_INTRINSICS_ 724#endif // _XM_VMX128_INTRINSICS_ 725} 726// Sets the Z component of a vector to a passed floating point value 727// This causes Load/Hit/Store on VMX targets 728XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z) 729{ 730#if defined(_XM_NO_INTRINSICS_) 731 XMVECTOR U; 732 U.vector4_f32[0] = V.vector4_f32[0]; 733 U.vector4_f32[1] = V.vector4_f32[1]; 734 U.vector4_f32[2] = z; 735 U.vector4_f32[3] = V.vector4_f32[3]; 736 return U; 737#elif defined(_XM_SSE_INTRINSICS_) 738#if defined(_XM_ISVS2005_) 739 XMVECTOR vResult = V; 740 vResult.m128_f32[2] = z; 741 return vResult; 742#else 743 // Swap z and x 744 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); 745 // Convert input to vector 746 XMVECTOR vTemp = _mm_set_ss(z); 747 // Replace the x component 748 vResult = _mm_move_ss(vResult,vTemp); 749 // Swap z and x again 750 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); 751 return vResult; 752#endif // _XM_ISVS2005_ 753#else // _XM_VMX128_INTRINSICS_ 754#endif // _XM_VMX128_INTRINSICS_ 755} 756 757// Sets the W component of a vector to a passed floating point value 758// This causes Load/Hit/Store on VMX targets 759XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w) 760{ 761#if defined(_XM_NO_INTRINSICS_) 762 XMVECTOR U; 763 U.vector4_f32[0] = V.vector4_f32[0]; 764 U.vector4_f32[1] = V.vector4_f32[1]; 765 U.vector4_f32[2] = V.vector4_f32[2]; 766 U.vector4_f32[3] = w; 767 return U; 768#elif defined(_XM_SSE_INTRINSICS_) 769#if defined(_XM_ISVS2005_) 770 XMVECTOR vResult = V; 771 vResult.m128_f32[3] = w; 772 return vResult; 773#else 774 // Swap w and x 775 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); 776 // Convert input to vector 777 XMVECTOR vTemp = _mm_set_ss(w); 778 // Replace the x component 779 vResult = _mm_move_ss(vResult,vTemp); 780 // Swap w and x again 781 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); 782 return vResult; 783#endif // _XM_ISVS2005_ 784#else // _XM_VMX128_INTRINSICS_ 785#endif // _XM_VMX128_INTRINSICS_ 786} 787 788//------------------------------------------------------------------------------ 789 790// Sets a component of a vector to a floating point value passed by pointer 791// This causes Load/Hit/Store on VMX targets 792XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i) 793{ 794#if defined(_XM_NO_INTRINSICS_) 795 XMVECTOR U; 796 XMASSERT( f != 0 ); 797 XMASSERT( i <= 3 ); 798 U = V; 799 U.vector4_f32[i] = *f; 800 return U; 801#elif defined(_XM_SSE_INTRINSICS_) 802 XMASSERT( f != 0 ); 803 XMASSERT( i <= 3 ); 804 XMVECTOR U = V; 805 U.m128_f32[i] = *f; 806 return U; 807#else // _XM_VMX128_INTRINSICS_ 808#endif // _XM_VMX128_INTRINSICS_ 809} 810 811//------------------------------------------------------------------------------ 812 813// Sets the X component of a vector to a floating point value passed by pointer 814XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x) 815{ 816#if defined(_XM_NO_INTRINSICS_) 817 XMVECTOR U; 818 XMASSERT( x != 0 ); 819 U.vector4_f32[0] = *x; 820 U.vector4_f32[1] = V.vector4_f32[1]; 821 U.vector4_f32[2] = V.vector4_f32[2]; 822 U.vector4_f32[3] = V.vector4_f32[3]; 823 return U; 824#elif defined(_XM_SSE_INTRINSICS_) 825 XMASSERT( x != 0 ); 826 XMVECTOR vResult = _mm_load_ss(x); 827 vResult = _mm_move_ss(V,vResult); 828 return vResult; 829#else // _XM_VMX128_INTRINSICS_ 830#endif // _XM_VMX128_INTRINSICS_ 831} 832 833// Sets the Y component of a vector to a floating point value passed by pointer 834XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y) 835{ 836#if defined(_XM_NO_INTRINSICS_) 837 XMVECTOR U; 838 XMASSERT( y != 0 ); 839 U.vector4_f32[0] = V.vector4_f32[0]; 840 U.vector4_f32[1] = *y; 841 U.vector4_f32[2] = V.vector4_f32[2]; 842 U.vector4_f32[3] = V.vector4_f32[3]; 843 return U; 844#elif defined(_XM_SSE_INTRINSICS_) 845 XMASSERT( y != 0 ); 846 // Swap y and x 847 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); 848 // Convert input to vector 849 XMVECTOR vTemp = _mm_load_ss(y); 850 // Replace the x component 851 vResult = _mm_move_ss(vResult,vTemp); 852 // Swap y and x again 853 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); 854 return vResult; 855#else // _XM_VMX128_INTRINSICS_ 856#endif // _XM_VMX128_INTRINSICS_ 857} 858 859// Sets the Z component of a vector to a floating point value passed by pointer 860XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z) 861{ 862#if defined(_XM_NO_INTRINSICS_) 863 XMVECTOR U; 864 XMASSERT( z != 0 ); 865 U.vector4_f32[0] = V.vector4_f32[0]; 866 U.vector4_f32[1] = V.vector4_f32[1]; 867 U.vector4_f32[2] = *z; 868 U.vector4_f32[3] = V.vector4_f32[3]; 869 return U; 870#elif defined(_XM_SSE_INTRINSICS_) 871 XMASSERT( z != 0 ); 872 // Swap z and x 873 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); 874 // Convert input to vector 875 XMVECTOR vTemp = _mm_load_ss(z); 876 // Replace the x component 877 vResult = _mm_move_ss(vResult,vTemp); 878 // Swap z and x again 879 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); 880 return vResult; 881#else // _XM_VMX128_INTRINSICS_ 882#endif // _XM_VMX128_INTRINSICS_ 883} 884 885// Sets the W component of a vector to a floating point value passed by pointer 886XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w) 887{ 888#if defined(_XM_NO_INTRINSICS_) 889 XMVECTOR U; 890 XMASSERT( w != 0 ); 891 U.vector4_f32[0] = V.vector4_f32[0]; 892 U.vector4_f32[1] = V.vector4_f32[1]; 893 U.vector4_f32[2] = V.vector4_f32[2]; 894 U.vector4_f32[3] = *w; 895 return U; 896#elif defined(_XM_SSE_INTRINSICS_) 897 XMASSERT( w != 0 ); 898 // Swap w and x 899 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); 900 // Convert input to vector 901 XMVECTOR vTemp = _mm_load_ss(w); 902 // Replace the x component 903 vResult = _mm_move_ss(vResult,vTemp); 904 // Swap w and x again 905 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); 906 return vResult; 907#else // _XM_VMX128_INTRINSICS_ 908#endif // _XM_VMX128_INTRINSICS_ 909} 910 911//------------------------------------------------------------------------------ 912 913// Sets a component of a vector to an integer passed by value 914// This causes Load/Hit/Store on VMX targets 915XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i) 916{ 917#if defined(_XM_NO_INTRINSICS_) 918 XMVECTOR U; 919 XMASSERT( i <= 3 ); 920 U = V; 921 U.vector4_u32[i] = x; 922 return U; 923#elif defined(_XM_SSE_INTRINSICS_) 924 XMASSERT( i <= 3 ); 925 XMVECTORU32 tmp; 926 tmp.v = V; 927 tmp.u[i] = x; 928 return tmp; 929#else // _XM_VMX128_INTRINSICS_ 930#endif // _XM_VMX128_INTRINSICS_ 931} 932 933//------------------------------------------------------------------------------ 934 935// Sets the X component of a vector to an integer passed by value 936// This causes Load/Hit/Store on VMX targets 937XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x) 938{ 939#if defined(_XM_NO_INTRINSICS_) 940 XMVECTOR U; 941 U.vector4_u32[0] = x; 942 U.vector4_u32[1] = V.vector4_u32[1]; 943 U.vector4_u32[2] = V.vector4_u32[2]; 944 U.vector4_u32[3] = V.vector4_u32[3]; 945 return U; 946#elif defined(_XM_SSE_INTRINSICS_) 947#if defined(_XM_ISVS2005_) 948 XMVECTOR vResult = V; 949 vResult.m128_i32[0] = x; 950 return vResult; 951#else 952 __m128i vTemp = _mm_cvtsi32_si128(x); 953 XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]); 954 return vResult; 955#endif // _XM_ISVS2005_ 956#else // _XM_VMX128_INTRINSICS_ 957#endif // _XM_VMX128_INTRINSICS_ 958} 959 960// Sets the Y component of a vector to an integer passed by value 961// This causes Load/Hit/Store on VMX targets 962XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y) 963{ 964#if defined(_XM_NO_INTRINSICS_) 965 XMVECTOR U; 966 U.vector4_u32[0] = V.vector4_u32[0]; 967 U.vector4_u32[1] = y; 968 U.vector4_u32[2] = V.vector4_u32[2]; 969 U.vector4_u32[3] = V.vector4_u32[3]; 970 return U; 971#elif defined(_XM_SSE_INTRINSICS_) 972#if defined(_XM_ISVS2005_) 973 XMVECTOR vResult = V; 974 vResult.m128_i32[1] = y; 975 return vResult; 976#else // Swap y and x 977 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); 978 // Convert input to vector 979 __m128i vTemp = _mm_cvtsi32_si128(y); 980 // Replace the x component 981 vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]); 982 // Swap y and x again 983 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); 984 return vResult; 985#endif // _XM_ISVS2005_ 986#else // _XM_VMX128_INTRINSICS_ 987#endif // _XM_VMX128_INTRINSICS_ 988} 989 990// Sets the Z component of a vector to an integer passed by value 991// This causes Load/Hit/Store on VMX targets 992XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z) 993{ 994#if defined(_XM_NO_INTRINSICS_) 995 XMVECTOR U; 996 U.vector4_u32[0] = V.vector4_u32[0]; 997 U.vector4_u32[1] = V.vector4_u32[1]; 998 U.vector4_u32[2] = z; 999 U.vector4_u32[3] = V.vector4_u32[3]; 1000 return U; 1001#elif defined(_XM_SSE_INTRINSICS_) 1002#if defined(_XM_ISVS2005_) 1003 XMVECTOR vResult = V; 1004 vResult.m128_i32[2] = z; 1005 return vResult; 1006#else 1007 // Swap z and x 1008 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); 1009 // Convert input to vector 1010 __m128i vTemp = _mm_cvtsi32_si128(z); 1011 // Replace the x component 1012 vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]); 1013 // Swap z and x again 1014 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); 1015 return vResult; 1016#endif // _XM_ISVS2005_ 1017#else // _XM_VMX128_INTRINSICS_ 1018#endif // _XM_VMX128_INTRINSICS_ 1019} 1020 1021// Sets the W component of a vector to an integer passed by value 1022// This causes Load/Hit/Store on VMX targets 1023XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w) 1024{ 1025#if defined(_XM_NO_INTRINSICS_) 1026 XMVECTOR U; 1027 U.vector4_u32[0] = V.vector4_u32[0]; 1028 U.vector4_u32[1] = V.vector4_u32[1]; 1029 U.vector4_u32[2] = V.vector4_u32[2]; 1030 U.vector4_u32[3] = w; 1031 return U; 1032#elif defined(_XM_SSE_INTRINSICS_) 1033#if defined(_XM_ISVS2005_) 1034 XMVECTOR vResult = V; 1035 vResult.m128_i32[3] = w; 1036 return vResult; 1037#else 1038 // Swap w and x 1039 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); 1040 // Convert input to vector 1041 __m128i vTemp = _mm_cvtsi32_si128(w); 1042 // Replace the x component 1043 vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]); 1044 // Swap w and x again 1045 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); 1046 return vResult; 1047#endif // _XM_ISVS2005_ 1048#else // _XM_VMX128_INTRINSICS_ 1049#endif // _XM_VMX128_INTRINSICS_ 1050} 1051 1052//------------------------------------------------------------------------------ 1053 1054// Sets a component of a vector to an integer value passed by pointer 1055// This causes Load/Hit/Store on VMX targets 1056XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i) 1057{ 1058#if defined(_XM_NO_INTRINSICS_) 1059 XMVECTOR U; 1060 XMASSERT( x != 0 ); 1061 XMASSERT( i <= 3 ); 1062 U = V; 1063 U.vector4_u32[i] = *x; 1064 return U; 1065#elif defined(_XM_SSE_INTRINSICS_) 1066 XMASSERT( x != 0 ); 1067 XMASSERT( i <= 3 ); 1068 XMVECTORU32 tmp; 1069 tmp.v = V; 1070 tmp.u[i] = *x; 1071 return tmp; 1072#else // _XM_VMX128_INTRINSICS_ 1073#endif // _XM_VMX128_INTRINSICS_ 1074} 1075 1076//------------------------------------------------------------------------------ 1077 1078// Sets the X component of a vector to an integer value passed by pointer 1079XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x) 1080{ 1081#if defined(_XM_NO_INTRINSICS_) 1082 XMVECTOR U; 1083 XMASSERT( x != 0 ); 1084 U.vector4_u32[0] = *x; 1085 U.vector4_u32[1] = V.vector4_u32[1]; 1086 U.vector4_u32[2] = V.vector4_u32[2]; 1087 U.vector4_u32[3] = V.vector4_u32[3]; 1088 return U; 1089#elif defined(_XM_SSE_INTRINSICS_) 1090 XMASSERT( x != 0 ); 1091 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x)); 1092 XMVECTOR vResult = _mm_move_ss(V,vTemp); 1093 return vResult; 1094#else // _XM_VMX128_INTRINSICS_ 1095#endif // _XM_VMX128_INTRINSICS_ 1096} 1097 1098// Sets the Y component of a vector to an integer value passed by pointer 1099XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y) 1100{ 1101#if defined(_XM_NO_INTRINSICS_) 1102 XMVECTOR U; 1103 XMASSERT( y != 0 ); 1104 U.vector4_u32[0] = V.vector4_u32[0]; 1105 U.vector4_u32[1] = *y; 1106 U.vector4_u32[2] = V.vector4_u32[2]; 1107 U.vector4_u32[3] = V.vector4_u32[3]; 1108 return U; 1109#elif defined(_XM_SSE_INTRINSICS_) 1110 XMASSERT( y != 0 ); 1111 // Swap y and x 1112 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); 1113 // Convert input to vector 1114 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y)); 1115 // Replace the x component 1116 vResult = _mm_move_ss(vResult,vTemp); 1117 // Swap y and x again 1118 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); 1119 return vResult; 1120#else // _XM_VMX128_INTRINSICS_ 1121#endif // _XM_VMX128_INTRINSICS_ 1122} 1123 1124// Sets the Z component of a vector to an integer value passed by pointer 1125XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z) 1126{ 1127#if defined(_XM_NO_INTRINSICS_) 1128 XMVECTOR U; 1129 XMASSERT( z != 0 ); 1130 U.vector4_u32[0] = V.vector4_u32[0]; 1131 U.vector4_u32[1] = V.vector4_u32[1]; 1132 U.vector4_u32[2] = *z; 1133 U.vector4_u32[3] = V.vector4_u32[3]; 1134 return U; 1135#elif defined(_XM_SSE_INTRINSICS_) 1136 XMASSERT( z != 0 ); 1137 // Swap z and x 1138 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); 1139 // Convert input to vector 1140 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z)); 1141 // Replace the x component 1142 vResult = _mm_move_ss(vResult,vTemp); 1143 // Swap z and x again 1144 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); 1145 return vResult; 1146#else // _XM_VMX128_INTRINSICS_ 1147#endif // _XM_VMX128_INTRINSICS_ 1148} 1149 1150// Sets the W component of a vector to an integer value passed by pointer 1151XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w) 1152{ 1153#if defined(_XM_NO_INTRINSICS_) 1154 XMVECTOR U; 1155 XMASSERT( w != 0 ); 1156 U.vector4_u32[0] = V.vector4_u32[0]; 1157 U.vector4_u32[1] = V.vector4_u32[1]; 1158 U.vector4_u32[2] = V.vector4_u32[2]; 1159 U.vector4_u32[3] = *w; 1160 return U; 1161#elif defined(_XM_SSE_INTRINSICS_) 1162 XMASSERT( w != 0 ); 1163 // Swap w and x 1164 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); 1165 // Convert input to vector 1166 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w)); 1167 // Replace the x component 1168 vResult = _mm_move_ss(vResult,vTemp); 1169 // Swap w and x again 1170 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); 1171 return vResult; 1172#else // _XM_VMX128_INTRINSICS_ 1173#endif // _XM_VMX128_INTRINSICS_ 1174} 1175 1176//------------------------------------------------------------------------------ 1177// Define a control vector to be used in XMVectorPermute 1178// operations. Visualize the two vectors V1 and V2 given 1179// in a permute as arranged back to back in a linear fashion, 1180// such that they form an array of 8 floating point values. 1181// The four integers specified in XMVectorPermuteControl 1182// will serve as indices into the array to select components 1183// from the two vectors. ElementIndex0 is used to select 1184// an element from the vectors to be placed in the first 1185// component of the resulting vector, ElementIndex1 is used 1186// to select an element for the second component, etc. 1187 1188XMFINLINE XMVECTOR XMVectorPermuteControl 1189( 1190 UINT ElementIndex0, 1191 UINT ElementIndex1, 1192 UINT ElementIndex2, 1193 UINT ElementIndex3 1194) 1195{ 1196#if defined(_XM_SSE_INTRINSICS_) || defined(_XM_NO_INTRINSICS_) 1197 XMVECTORU32 vControl; 1198 static CONST UINT ControlElement[] = { 1199 XM_PERMUTE_0X, 1200 XM_PERMUTE_0Y, 1201 XM_PERMUTE_0Z, 1202 XM_PERMUTE_0W, 1203 XM_PERMUTE_1X, 1204 XM_PERMUTE_1Y, 1205 XM_PERMUTE_1Z, 1206 XM_PERMUTE_1W 1207 }; 1208 XMASSERT(ElementIndex0 < 8); 1209 XMASSERT(ElementIndex1 < 8); 1210 XMASSERT(ElementIndex2 < 8); 1211 XMASSERT(ElementIndex3 < 8); 1212 1213 vControl.u[0] = ControlElement[ElementIndex0]; 1214 vControl.u[1] = ControlElement[ElementIndex1]; 1215 vControl.u[2] = ControlElement[ElementIndex2]; 1216 vControl.u[3] = ControlElement[ElementIndex3]; 1217 return vControl.v; 1218#else 1219#endif 1220} 1221 1222//------------------------------------------------------------------------------ 1223 1224// Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte 1225// entries into a single 16 byte vector and return it. Index 0-15 = V1, 1226// 16-31 = V2 1227XMFINLINE XMVECTOR XMVectorPermute 1228( 1229 FXMVECTOR V1, 1230 FXMVECTOR V2, 1231 FXMVECTOR Control 1232) 1233{ 1234#if defined(_XM_NO_INTRINSICS_) 1235 const BYTE *aByte[2]; 1236 XMVECTOR Result; 1237 UINT i, uIndex, VectorIndex; 1238 const BYTE *pControl; 1239 BYTE *pWork; 1240 1241 // Indices must be in range from 0 to 31 1242 XMASSERT((Control.vector4_u32[0] & 0xE0E0E0E0) == 0); 1243 XMASSERT((Control.vector4_u32[1] & 0xE0E0E0E0) == 0); 1244 XMASSERT((Control.vector4_u32[2] & 0xE0E0E0E0) == 0); 1245 XMASSERT((Control.vector4_u32[3] & 0xE0E0E0E0) == 0); 1246 1247 // 0-15 = V1, 16-31 = V2 1248 aByte[0] = (const BYTE*)(&V1); 1249 aByte[1] = (const BYTE*)(&V2); 1250 i = 16; 1251 pControl = (const BYTE *)(&Control); 1252 pWork = (BYTE *)(&Result); 1253 do { 1254 // Get the byte to map from 1255 uIndex = pControl[0]; 1256 ++pControl; 1257 VectorIndex = (uIndex>>4)&1; 1258 uIndex &= 0x0F; 1259#if defined(_XM_LITTLEENDIAN_) 1260 uIndex ^= 3; // Swap byte ordering on little endian machines 1261#endif 1262 pWork[0] = aByte[VectorIndex][uIndex]; 1263 ++pWork; 1264 } while (--i); 1265 return Result; 1266#elif defined(_XM_SSE_INTRINSICS_) 1267#if defined(_PREFAST_) || defined(XMDEBUG) 1268 // Indices must be in range from 0 to 31 1269 static const XMVECTORI32 PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0}; 1270 XMVECTOR vAssert = _mm_and_ps(Control,PremuteTest); 1271 __m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero); 1272 XMASSERT(_mm_movemask_ps(*reinterpret_cast<const __m128 *>(&vAsserti)) == 0xf); 1273#endif 1274 // Store the vectors onto local memory on the stack 1275 XMVECTOR Array[2]; 1276 Array[0] = V1; 1277 Array[1] = V2; 1278 // Output vector, on the stack 1279 XMVECTORU8 vResult; 1280 // Get pointer to the two vectors on the stack 1281 const BYTE *pInput = reinterpret_cast<const BYTE *>(Array); 1282 // Store the Control vector on the stack to access the bytes 1283 // don't use Control, it can cause a register variable to spill on the stack. 1284 XMVECTORU8 vControl; 1285 vControl.v = Control; // Write to memory 1286 UINT i = 0; 1287 do { 1288 UINT ComponentIndex = vControl.u[i] & 0x1FU; 1289 ComponentIndex ^= 3; // Swap byte ordering 1290 vResult.u[i] = pInput[ComponentIndex]; 1291 } while (++i<16); 1292 return vResult; 1293#else // _XM_SSE_INTRINSICS_ 1294#endif // _XM_VMX128_INTRINSICS_ 1295} 1296 1297//------------------------------------------------------------------------------ 1298// Define a control vector to be used in XMVectorSelect 1299// operations. The four integers specified in XMVectorSelectControl 1300// serve as indices to select between components in two vectors. 1301// The first index controls selection for the first component of 1302// the vectors involved in a select operation, the second index 1303// controls selection for the second component etc. A value of 1304// zero for an index causes the corresponding component from the first 1305// vector to be selected whereas a one causes the component from the 1306// second vector to be selected instead. 1307 1308XMFINLINE XMVECTOR XMVectorSelectControl 1309( 1310 UINT VectorIndex0, 1311 UINT VectorIndex1, 1312 UINT VectorIndex2, 1313 UINT VectorIndex3 1314) 1315{ 1316#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 1317 // x=Index0,y=Index1,z=Index2,w=Index3 1318 __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0); 1319 // Any non-zero entries become 0xFFFFFFFF else 0 1320 vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero); 1321 return reinterpret_cast<__m128 *>(&vTemp)[0]; 1322#else 1323 XMVECTOR ControlVector; 1324 CONST UINT ControlElement[] = 1325 { 1326 XM_SELECT_0, 1327 XM_SELECT_1 1328 }; 1329 1330 XMASSERT(VectorIndex0 < 2); 1331 XMASSERT(VectorIndex1 < 2); 1332 XMASSERT(VectorIndex2 < 2); 1333 XMASSERT(VectorIndex3 < 2); 1334 1335 ControlVector.vector4_u32[0] = ControlElement[VectorIndex0]; 1336 ControlVector.vector4_u32[1] = ControlElement[VectorIndex1]; 1337 ControlVector.vector4_u32[2] = ControlElement[VectorIndex2]; 1338 ControlVector.vector4_u32[3] = ControlElement[VectorIndex3]; 1339 1340 return ControlVector; 1341 1342#endif 1343} 1344 1345//------------------------------------------------------------------------------ 1346 1347XMFINLINE XMVECTOR XMVectorSelect 1348( 1349 FXMVECTOR V1, 1350 FXMVECTOR V2, 1351 FXMVECTOR Control 1352) 1353{ 1354#if defined(_XM_NO_INTRINSICS_) 1355 1356 XMVECTOR Result; 1357 1358 Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]); 1359 Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]); 1360 Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]); 1361 Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]); 1362 1363 return Result; 1364 1365#elif defined(_XM_SSE_INTRINSICS_) 1366 XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1); 1367 XMVECTOR vTemp2 = _mm_and_ps(V2,Control); 1368 return _mm_or_ps(vTemp1,vTemp2); 1369#else // _XM_VMX128_INTRINSICS_ 1370#endif // _XM_VMX128_INTRINSICS_ 1371} 1372 1373//------------------------------------------------------------------------------ 1374 1375XMFINLINE XMVECTOR XMVectorMergeXY 1376( 1377 FXMVECTOR V1, 1378 FXMVECTOR V2 1379) 1380{ 1381#if defined(_XM_NO_INTRINSICS_) 1382 1383 XMVECTOR Result; 1384 1385 Result.vector4_u32[0] = V1.vector4_u32[0]; 1386 Result.vector4_u32[1] = V2.vector4_u32[0]; 1387 Result.vector4_u32[2] = V1.vector4_u32[1]; 1388 Result.vector4_u32[3] = V2.vector4_u32[1]; 1389 1390 return Result; 1391 1392#elif defined(_XM_SSE_INTRINSICS_) 1393 return _mm_unpacklo_ps( V1, V2 ); 1394#else // _XM_VMX128_INTRINSICS_ 1395#endif // _XM_VMX128_INTRINSICS_ 1396} 1397 1398//------------------------------------------------------------------------------ 1399 1400XMFINLINE XMVECTOR XMVectorMergeZW 1401( 1402 FXMVECTOR V1, 1403 FXMVECTOR V2 1404) 1405{ 1406#if defined(_XM_NO_INTRINSICS_) 1407 1408 XMVECTOR Result; 1409 1410 Result.vector4_u32[0] = V1.vector4_u32[2]; 1411 Result.vector4_u32[1] = V2.vector4_u32[2]; 1412 Result.vector4_u32[2] = V1.vector4_u32[3]; 1413 Result.vector4_u32[3] = V2.vector4_u32[3]; 1414 1415 return Result; 1416 1417#elif defined(_XM_SSE_INTRINSICS_) 1418 return _mm_unpackhi_ps( V1, V2 ); 1419#else // _XM_VMX128_INTRINSICS_ 1420#endif // _XM_VMX128_INTRINSICS_ 1421} 1422 1423//------------------------------------------------------------------------------ 1424// Comparison operations 1425//------------------------------------------------------------------------------ 1426 1427//------------------------------------------------------------------------------ 1428 1429XMFINLINE XMVECTOR XMVectorEqual 1430( 1431 FXMVECTOR V1, 1432 FXMVECTOR V2 1433) 1434{ 1435#if defined(_XM_NO_INTRINSICS_) 1436 1437 XMVECTOR Control; 1438 1439 Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; 1440 Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; 1441 Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; 1442 Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; 1443 1444 return Control; 1445 1446#elif defined(_XM_SSE_INTRINSICS_) 1447 return _mm_cmpeq_ps( V1, V2 ); 1448#else // _XM_VMX128_INTRINSICS_ 1449#endif // _XM_VMX128_INTRINSICS_ 1450} 1451 1452//------------------------------------------------------------------------------ 1453 1454XMFINLINE XMVECTOR XMVectorEqualR 1455( 1456 UINT* pCR, 1457 FXMVECTOR V1, 1458 FXMVECTOR V2 1459) 1460{ 1461#if defined(_XM_NO_INTRINSICS_) 1462 UINT ux, uy, uz, uw, CR; 1463 XMVECTOR Control; 1464 1465 XMASSERT( pCR ); 1466 1467 ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; 1468 uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; 1469 uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; 1470 uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; 1471 CR = 0; 1472 if (ux&uy&uz&uw) 1473 { 1474 // All elements are greater 1475 CR = XM_CRMASK_CR6TRUE; 1476 } 1477 else if (!(ux|uy|uz|uw)) 1478 { 1479 // All elements are not greater 1480 CR = XM_CRMASK_CR6FALSE; 1481 } 1482 *pCR = CR; 1483 Control.vector4_u32[0] = ux; 1484 Control.vector4_u32[1] = uy; 1485 Control.vector4_u32[2] = uz; 1486 Control.vector4_u32[3] = uw; 1487 return Control; 1488 1489#elif defined(_XM_SSE_INTRINSICS_) 1490 XMASSERT( pCR ); 1491 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 1492 UINT CR = 0; 1493 int iTest = _mm_movemask_ps(vTemp); 1494 if (iTest==0xf) 1495 { 1496 CR = XM_CRMASK_CR6TRUE; 1497 } 1498 else if (!iTest) 1499 { 1500 // All elements are not greater 1501 CR = XM_CRMASK_CR6FALSE; 1502 } 1503 *pCR = CR; 1504 return vTemp; 1505#else // _XM_VMX128_INTRINSICS_ 1506#endif // _XM_VMX128_INTRINSICS_ 1507} 1508 1509//------------------------------------------------------------------------------ 1510// Treat the components of the vectors as unsigned integers and 1511// compare individual bits between the two. This is useful for 1512// comparing control vectors and result vectors returned from 1513// other comparison operations. 1514 1515XMFINLINE XMVECTOR XMVectorEqualInt 1516( 1517 FXMVECTOR V1, 1518 FXMVECTOR V2 1519) 1520{ 1521#if defined(_XM_NO_INTRINSICS_) 1522 1523 XMVECTOR Control; 1524 1525 Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0; 1526 Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0; 1527 Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0; 1528 Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0; 1529 1530 return Control; 1531 1532#elif defined(_XM_SSE_INTRINSICS_) 1533 __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] ); 1534 return reinterpret_cast<__m128 *>(&V)[0]; 1535#else // _XM_VMX128_INTRINSICS_ 1536#endif // _XM_VMX128_INTRINSICS_ 1537} 1538 1539//------------------------------------------------------------------------------ 1540 1541XMFINLINE XMVECTOR XMVectorEqualIntR 1542( 1543 UINT* pCR, 1544 FXMVECTOR V1, 1545 FXMVECTOR V2 1546) 1547{ 1548#if defined(_XM_NO_INTRINSICS_) 1549 1550 XMVECTOR Control; 1551 1552 XMASSERT(pCR); 1553 1554 Control = XMVectorEqualInt(V1, V2); 1555 1556 *pCR = 0; 1557 1558 if (XMVector4EqualInt(Control, XMVectorTrueInt())) 1559 { 1560 // All elements are equal 1561 *pCR |= XM_CRMASK_CR6TRUE; 1562 } 1563 else if (XMVector4EqualInt(Control, XMVectorFalseInt())) 1564 { 1565 // All elements are not equal 1566 *pCR |= XM_CRMASK_CR6FALSE; 1567 } 1568 1569 return Control; 1570 1571#elif defined(_XM_SSE_INTRINSICS_) 1572 XMASSERT(pCR); 1573 __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] ); 1574 int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]); 1575 UINT CR = 0; 1576 if (iTemp==0x0F) 1577 { 1578 CR = XM_CRMASK_CR6TRUE; 1579 } 1580 else if (!iTemp) 1581 { 1582 CR = XM_CRMASK_CR6FALSE; 1583 } 1584 *pCR = CR; 1585 return reinterpret_cast<__m128 *>(&V)[0]; 1586#else // _XM_VMX128_INTRINSICS_ 1587#endif // _XM_VMX128_INTRINSICS_ 1588} 1589 1590//------------------------------------------------------------------------------ 1591 1592XMFINLINE XMVECTOR XMVectorNearEqual 1593( 1594 FXMVECTOR V1, 1595 FXMVECTOR V2, 1596 FXMVECTOR Epsilon 1597) 1598{ 1599#if defined(_XM_NO_INTRINSICS_) 1600 1601 FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw; 1602 XMVECTOR Control; 1603 1604 fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0]; 1605 fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1]; 1606 fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2]; 1607 fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3]; 1608 1609 fDeltax = fabsf(fDeltax); 1610 fDeltay = fabsf(fDeltay); 1611 fDeltaz = fabsf(fDeltaz); 1612 fDeltaw = fabsf(fDeltaw); 1613 1614 Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0; 1615 Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0; 1616 Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0; 1617 Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0; 1618 1619 return Control; 1620 1621#elif defined(_XM_SSE_INTRINSICS_) 1622 // Get the difference 1623 XMVECTOR vDelta = _mm_sub_ps(V1,V2); 1624 // Get the absolute value of the difference 1625 XMVECTOR vTemp = _mm_setzero_ps(); 1626 vTemp = _mm_sub_ps(vTemp,vDelta); 1627 vTemp = _mm_max_ps(vTemp,vDelta); 1628 vTemp = _mm_cmple_ps(vTemp,Epsilon); 1629 return vTemp; 1630#else // _XM_VMX128_INTRINSICS_ 1631#endif // _XM_VMX128_INTRINSICS_ 1632} 1633 1634//------------------------------------------------------------------------------ 1635 1636XMFINLINE XMVECTOR XMVectorNotEqual 1637( 1638 FXMVECTOR V1, 1639 FXMVECTOR V2 1640) 1641{ 1642#if defined(_XM_NO_INTRINSICS_) 1643 1644 XMVECTOR Control; 1645 Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; 1646 Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; 1647 Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; 1648 Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; 1649 return Control; 1650 1651#elif defined(_XM_SSE_INTRINSICS_) 1652 return _mm_cmpneq_ps( V1, V2 ); 1653#else // _XM_VMX128_INTRINSICS_ 1654#endif // _XM_VMX128_INTRINSICS_ 1655} 1656 1657//------------------------------------------------------------------------------ 1658 1659XMFINLINE XMVECTOR XMVectorNotEqualInt 1660( 1661 FXMVECTOR V1, 1662 FXMVECTOR V2 1663) 1664{ 1665#if defined(_XM_NO_INTRINSICS_) 1666 1667 XMVECTOR Control; 1668 Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0; 1669 Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0; 1670 Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0; 1671 Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0; 1672 return Control; 1673 1674#elif defined(_XM_SSE_INTRINSICS_) 1675 __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] ); 1676 return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask); 1677#else // _XM_VMX128_INTRINSICS_ 1678#endif // _XM_VMX128_INTRINSICS_ 1679} 1680 1681//------------------------------------------------------------------------------ 1682 1683XMFINLINE XMVECTOR XMVectorGreater 1684( 1685 FXMVECTOR V1, 1686 FXMVECTOR V2 1687) 1688{ 1689#if defined(_XM_NO_INTRINSICS_) 1690 1691 XMVECTOR Control; 1692 Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; 1693 Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; 1694 Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; 1695 Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; 1696 return Control; 1697 1698#elif defined(_XM_SSE_INTRINSICS_) 1699 return _mm_cmpgt_ps( V1, V2 ); 1700#else // _XM_VMX128_INTRINSICS_ 1701#endif // _XM_VMX128_INTRINSICS_ 1702} 1703 1704//------------------------------------------------------------------------------ 1705 1706XMFINLINE XMVECTOR XMVectorGreaterR 1707( 1708 UINT* pCR, 1709 FXMVECTOR V1, 1710 FXMVECTOR V2 1711) 1712{ 1713#if defined(_XM_NO_INTRINSICS_) 1714 UINT ux, uy, uz, uw, CR; 1715 XMVECTOR Control; 1716 1717 XMASSERT( pCR ); 1718 1719 ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; 1720 uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; 1721 uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; 1722 uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; 1723 CR = 0; 1724 if (ux&uy&uz&uw) 1725 { 1726 // All elements are greater 1727 CR = XM_CRMASK_CR6TRUE; 1728 } 1729 else if (!(ux|uy|uz|uw)) 1730 { 1731 // All elements are not greater 1732 CR = XM_CRMASK_CR6FALSE; 1733 } 1734 *pCR = CR; 1735 Control.vector4_u32[0] = ux; 1736 Control.vector4_u32[1] = uy; 1737 Control.vector4_u32[2] = uz; 1738 Control.vector4_u32[3] = uw; 1739 return Control; 1740 1741#elif defined(_XM_SSE_INTRINSICS_) 1742 XMASSERT( pCR ); 1743 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); 1744 UINT CR = 0; 1745 int iTest = _mm_movemask_ps(vTemp); 1746 if (iTest==0xf) 1747 { 1748 CR = XM_CRMASK_CR6TRUE; 1749 } 1750 else if (!iTest) 1751 { 1752 // All elements are not greater 1753 CR = XM_CRMASK_CR6FALSE; 1754 } 1755 *pCR = CR; 1756 return vTemp; 1757#else // _XM_VMX128_INTRINSICS_ 1758#endif // _XM_VMX128_INTRINSICS_ 1759} 1760 1761//------------------------------------------------------------------------------ 1762 1763XMFINLINE XMVECTOR XMVectorGreaterOrEqual 1764( 1765 FXMVECTOR V1, 1766 FXMVECTOR V2 1767) 1768{ 1769#if defined(_XM_NO_INTRINSICS_) 1770 1771 XMVECTOR Control; 1772 Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; 1773 Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; 1774 Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; 1775 Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; 1776 return Control; 1777 1778#elif defined(_XM_SSE_INTRINSICS_) 1779 return _mm_cmpge_ps( V1, V2 ); 1780#else // _XM_VMX128_INTRINSICS_ 1781#endif // _XM_VMX128_INTRINSICS_ 1782} 1783 1784//------------------------------------------------------------------------------ 1785 1786XMFINLINE XMVECTOR XMVectorGreaterOrEqualR 1787( 1788 UINT* pCR, 1789 FXMVECTOR V1, 1790 FXMVECTOR V2 1791) 1792{ 1793#if defined(_XM_NO_INTRINSICS_) 1794 UINT ux, uy, uz, uw, CR; 1795 XMVECTOR Control; 1796 1797 XMASSERT( pCR ); 1798 1799 ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; 1800 uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; 1801 uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; 1802 uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; 1803 CR = 0; 1804 if (ux&uy&uz&uw) 1805 { 1806 // All elements are greater 1807 CR = XM_CRMASK_CR6TRUE; 1808 } 1809 else if (!(ux|uy|uz|uw)) 1810 { 1811 // All elements are not greater 1812 CR = XM_CRMASK_CR6FALSE; 1813 } 1814 *pCR = CR; 1815 Control.vector4_u32[0] = ux; 1816 Control.vector4_u32[1] = uy; 1817 Control.vector4_u32[2] = uz; 1818 Control.vector4_u32[3] = uw; 1819 return Control; 1820 1821#elif defined(_XM_SSE_INTRINSICS_) 1822 XMASSERT( pCR ); 1823 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); 1824 UINT CR = 0; 1825 int iTest = _mm_movemask_ps(vTemp); 1826 if (iTest==0xf) 1827 { 1828 CR = XM_CRMASK_CR6TRUE; 1829 } 1830 else if (!iTest) 1831 { 1832 // All elements are not greater 1833 CR = XM_CRMASK_CR6FALSE; 1834 } 1835 *pCR = CR; 1836 return vTemp; 1837#else // _XM_VMX128_INTRINSICS_ 1838#endif // _XM_VMX128_INTRINSICS_ 1839} 1840 1841//------------------------------------------------------------------------------ 1842 1843XMFINLINE XMVECTOR XMVectorLess 1844( 1845 FXMVECTOR V1, 1846 FXMVECTOR V2 1847) 1848{ 1849#if defined(_XM_NO_INTRINSICS_) 1850 1851 XMVECTOR Control; 1852 Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; 1853 Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; 1854 Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; 1855 Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; 1856 return Control; 1857 1858#elif defined(_XM_SSE_INTRINSICS_) 1859 return _mm_cmplt_ps( V1, V2 ); 1860#else // _XM_VMX128_INTRINSICS_ 1861#endif // _XM_VMX128_INTRINSICS_ 1862} 1863 1864//------------------------------------------------------------------------------ 1865 1866XMFINLINE XMVECTOR XMVectorLessOrEqual 1867( 1868 FXMVECTOR V1, 1869 FXMVECTOR V2 1870) 1871{ 1872#if defined(_XM_NO_INTRINSICS_) 1873 1874 XMVECTOR Control; 1875 Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; 1876 Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; 1877 Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; 1878 Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; 1879 return Control; 1880 1881#elif defined(_XM_SSE_INTRINSICS_) 1882 return _mm_cmple_ps( V1, V2 ); 1883#else // _XM_VMX128_INTRINSICS_ 1884#endif // _XM_VMX128_INTRINSICS_ 1885} 1886 1887//------------------------------------------------------------------------------ 1888 1889XMFINLINE XMVECTOR XMVectorInBounds 1890( 1891 FXMVECTOR V, 1892 FXMVECTOR Bounds 1893) 1894{ 1895#if defined(_XM_NO_INTRINSICS_) 1896 1897 XMVECTOR Control; 1898 Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0; 1899 Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0; 1900 Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0; 1901 Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0; 1902 return Control; 1903 1904#elif defined(_XM_SSE_INTRINSICS_) 1905 // Test if less than or equal 1906 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 1907 // Negate the bounds 1908 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 1909 // Test if greater or equal (Reversed) 1910 vTemp2 = _mm_cmple_ps(vTemp2,V); 1911 // Blend answers 1912 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 1913 return vTemp1; 1914#else // _XM_VMX128_INTRINSICS_ 1915#endif // _XM_VMX128_INTRINSICS_ 1916} 1917 1918//------------------------------------------------------------------------------ 1919 1920XMFINLINE XMVECTOR XMVectorInBoundsR 1921( 1922 UINT* pCR, 1923 FXMVECTOR V, 1924 FXMVECTOR Bounds 1925) 1926{ 1927#if defined(_XM_NO_INTRINSICS_) 1928 UINT ux, uy, uz, uw, CR; 1929 XMVECTOR Control; 1930 1931 XMASSERT( pCR != 0 ); 1932 1933 ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0; 1934 uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0; 1935 uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0; 1936 uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0; 1937 1938 CR = 0; 1939 1940 if (ux&uy&uz&uw) 1941 { 1942 // All elements are in bounds 1943 CR = XM_CRMASK_CR6BOUNDS; 1944 } 1945 *pCR = CR; 1946 Control.vector4_u32[0] = ux; 1947 Control.vector4_u32[1] = uy; 1948 Control.vector4_u32[2] = uz; 1949 Control.vector4_u32[3] = uw; 1950 return Control; 1951 1952#elif defined(_XM_SSE_INTRINSICS_) 1953 XMASSERT( pCR != 0 ); 1954 // Test if less than or equal 1955 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 1956 // Negate the bounds 1957 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 1958 // Test if greater or equal (Reversed) 1959 vTemp2 = _mm_cmple_ps(vTemp2,V); 1960 // Blend answers 1961 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 1962 1963 UINT CR = 0; 1964 if (_mm_movemask_ps(vTemp1)==0xf) { 1965 // All elements are in bounds 1966 CR = XM_CRMASK_CR6BOUNDS; 1967 } 1968 *pCR = CR; 1969 return vTemp1; 1970#else // _XM_VMX128_INTRINSICS_ 1971#endif // _XM_VMX128_INTRINSICS_ 1972} 1973 1974//------------------------------------------------------------------------------ 1975 1976XMFINLINE XMVECTOR XMVectorIsNaN 1977( 1978 FXMVECTOR V 1979) 1980{ 1981#if defined(_XM_NO_INTRINSICS_) 1982 1983 XMVECTOR Control; 1984 Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0; 1985 Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0; 1986 Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0; 1987 Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0; 1988 return Control; 1989 1990#elif defined(_XM_SSE_INTRINSICS_) 1991 // Mask off the exponent 1992 __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity); 1993 // Mask off the mantissa 1994 __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest); 1995 // Are any of the exponents == 0x7F800000? 1996 vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity); 1997 // Are any of the mantissa's zero? (SSE2 doesn't have a neq test) 1998 vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero); 1999 // Perform a not on the NaN test to be true on NON-zero mantissas 2000 vTempNan = _mm_andnot_si128(vTempNan,vTempInf); 2001 // If any are NaN, the signs are true after the merge above 2002 return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0]; 2003#else // _XM_VMX128_INTRINSICS_ 2004#endif // _XM_VMX128_INTRINSICS_ 2005} 2006 2007//------------------------------------------------------------------------------ 2008 2009XMFINLINE XMVECTOR XMVectorIsInfinite 2010( 2011 FXMVECTOR V 2012) 2013{ 2014#if defined(_XM_NO_INTRINSICS_) 2015 2016 XMVECTOR Control; 2017 Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0; 2018 Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0; 2019 Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0; 2020 Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0; 2021 return Control; 2022 2023#elif defined(_XM_SSE_INTRINSICS_) 2024 // Mask off the sign bit 2025 __m128 vTemp = _mm_and_ps(V,g_XMAbsMask); 2026 // Compare to infinity 2027 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); 2028 // If any are infinity, the signs are true. 2029 return vTemp; 2030#else // _XM_VMX128_INTRINSICS_ 2031#endif // _XM_VMX128_INTRINSICS_ 2032} 2033 2034//------------------------------------------------------------------------------ 2035// Rounding and clamping operations 2036//------------------------------------------------------------------------------ 2037 2038//------------------------------------------------------------------------------ 2039 2040XMFINLINE XMVECTOR XMVectorMin 2041( 2042 FXMVECTOR V1, 2043 FXMVECTOR V2 2044) 2045{ 2046#if defined(_XM_NO_INTRINSICS_) 2047 2048 XMVECTOR Result; 2049 Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0]; 2050 Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1]; 2051 Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2]; 2052 Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3]; 2053 return Result; 2054 2055#elif defined(_XM_SSE_INTRINSICS_) 2056 return _mm_min_ps( V1, V2 ); 2057#else // _XM_VMX128_INTRINSICS_ 2058#endif // _XM_VMX128_INTRINSICS_ 2059} 2060 2061//------------------------------------------------------------------------------ 2062 2063XMFINLINE XMVECTOR XMVectorMax 2064( 2065 FXMVECTOR V1, 2066 FXMVECTOR V2 2067) 2068{ 2069#if defined(_XM_NO_INTRINSICS_) 2070 2071 XMVECTOR Result; 2072 Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0]; 2073 Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1]; 2074 Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2]; 2075 Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3]; 2076 return Result; 2077 2078#elif defined(_XM_SSE_INTRINSICS_) 2079 return _mm_max_ps( V1, V2 ); 2080#else // _XM_VMX128_INTRINSICS_ 2081#endif // _XM_VMX128_INTRINSICS_ 2082} 2083 2084//------------------------------------------------------------------------------ 2085 2086XMFINLINE XMVECTOR XMVectorRound 2087( 2088 FXMVECTOR V 2089) 2090{ 2091#if defined(_XM_NO_INTRINSICS_) 2092 2093 XMVECTOR Result; 2094 XMVECTOR Bias; 2095 CONST XMVECTOR Zero = XMVectorZero(); 2096 CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f); 2097 CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f); 2098 2099 Bias = XMVectorLess(V, Zero); 2100 Bias = XMVectorSelect(BiasPos, BiasNeg, Bias); 2101 Result = XMVectorAdd(V, Bias); 2102 Result = XMVectorTruncate(Result); 2103 2104 return Result; 2105 2106#elif defined(_XM_SSE_INTRINSICS_) 2107 // To handle NAN, INF and numbers greater than 8388608, use masking 2108 // Get the abs value 2109 __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask); 2110 // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF 2111 vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction); 2112 // Convert to int and back to float for rounding 2113 __m128i vInt = _mm_cvtps_epi32(V); 2114 // Convert back to floats 2115 XMVECTOR vResult = _mm_cvtepi32_ps(vInt); 2116 // All numbers less than 8388608 will use the round to int 2117 vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); 2118 // All others, use the ORIGINAL value 2119 vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]); 2120 vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); 2121 return vResult; 2122#else // _XM_VMX128_INTRINSICS_ 2123#endif // _XM_VMX128_INTRINSICS_ 2124} 2125 2126//------------------------------------------------------------------------------ 2127 2128XMFINLINE XMVECTOR XMVectorTruncate 2129( 2130 FXMVECTOR V 2131) 2132{ 2133#if defined(_XM_NO_INTRINSICS_) 2134 XMVECTOR Result; 2135 UINT i; 2136 2137 // Avoid C4701 2138 Result.vector4_f32[0] = 0.0f; 2139 2140 for (i = 0; i < 4; i++) 2141 { 2142 if (XMISNAN(V.vector4_f32[i])) 2143 { 2144 Result.vector4_u32[i] = 0x7FC00000; 2145 } 2146 else if (fabsf(V.vector4_f32[i]) < 8388608.0f) 2147 { 2148 Result.vector4_f32[i] = (FLOAT)((INT)V.vector4_f32[i]); 2149 } 2150 else 2151 { 2152 Result.vector4_f32[i] = V.vector4_f32[i]; 2153 } 2154 } 2155 return Result; 2156 2157#elif defined(_XM_SSE_INTRINSICS_) 2158 // To handle NAN, INF and numbers greater than 8388608, use masking 2159 // Get the abs value 2160 __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask); 2161 // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF 2162 vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction); 2163 // Convert to int and back to float for rounding with truncation 2164 __m128i vInt = _mm_cvttps_epi32(V); 2165 // Convert back to floats 2166 XMVECTOR vResult = _mm_cvtepi32_ps(vInt); 2167 // All numbers less than 8388608 will use the round to int 2168 vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); 2169 // All others, use the ORIGINAL value 2170 vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]); 2171 vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); 2172 return vResult; 2173#else // _XM_VMX128_INTRINSICS_ 2174#endif // _XM_VMX128_INTRINSICS_ 2175} 2176 2177//------------------------------------------------------------------------------ 2178 2179XMFINLINE XMVECTOR XMVectorFloor 2180( 2181 FXMVECTOR V 2182) 2183{ 2184#if defined(_XM_NO_INTRINSICS_) 2185 2186 XMVECTOR vResult = { 2187 floorf(V.vector4_f32[0]), 2188 floorf(V.vector4_f32[1]), 2189 floorf(V.vector4_f32[2]), 2190 floorf(V.vector4_f32[3]) 2191 }; 2192 return vResult; 2193 2194#elif defined(_XM_SSE_INTRINSICS_) 2195 XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon); 2196 __m128i vInt = _mm_cvtps_epi32(vResult); 2197 vResult = _mm_cvtepi32_ps(vInt); 2198 return vResult; 2199#else // _XM_VMX128_INTRINSICS_ 2200#endif // _XM_VMX128_INTRINSICS_ 2201} 2202 2203//------------------------------------------------------------------------------ 2204 2205XMFINLINE XMVECTOR XMVectorCeiling 2206( 2207 FXMVECTOR V 2208) 2209{ 2210#if defined(_XM_NO_INTRINSICS_) 2211 XMVECTOR vResult = { 2212 ceilf(V.vector4_f32[0]), 2213 ceilf(V.vector4_f32[1]), 2214 ceilf(V.vector4_f32[2]), 2215 ceilf(V.vector4_f32[3]) 2216 }; 2217 return vResult; 2218 2219#elif defined(_XM_SSE_INTRINSICS_) 2220 XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon); 2221 __m128i vInt = _mm_cvtps_epi32(vResult); 2222 vResult = _mm_cvtepi32_ps(vInt); 2223 return vResult; 2224#else // _XM_VMX128_INTRINSICS_ 2225#endif // _XM_VMX128_INTRINSICS_ 2226} 2227 2228//------------------------------------------------------------------------------ 2229 2230XMFINLINE XMVECTOR XMVectorClamp 2231( 2232 FXMVECTOR V, 2233 FXMVECTOR Min, 2234 FXMVECTOR Max 2235) 2236{ 2237#if defined(_XM_NO_INTRINSICS_) 2238 2239 XMVECTOR Result; 2240 2241 XMASSERT(XMVector4LessOrEqual(Min, Max)); 2242 2243 Result = XMVectorMax(Min, V); 2244 Result = XMVectorMin(Max, Result); 2245 2246 return Result; 2247 2248#elif defined(_XM_SSE_INTRINSICS_) 2249 XMVECTOR vResult; 2250 XMASSERT(XMVector4LessOrEqual(Min, Max)); 2251 vResult = _mm_max_ps(Min,V); 2252 vResult = _mm_min_ps(vResult,Max); 2253 return vResult; 2254#else // _XM_VMX128_INTRINSICS_ 2255#endif // _XM_VMX128_INTRINSICS_ 2256} 2257 2258//------------------------------------------------------------------------------ 2259 2260XMFINLINE XMVECTOR XMVectorSaturate 2261( 2262 FXMVECTOR V 2263) 2264{ 2265#if defined(_XM_NO_INTRINSICS_) 2266 2267 CONST XMVECTOR Zero = XMVectorZero(); 2268 2269 return XMVectorClamp(V, Zero, g_XMOne.v); 2270 2271#elif defined(_XM_SSE_INTRINSICS_) 2272 // Set <0 to 0 2273 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2274 // Set>1 to 1 2275 return _mm_min_ps(vResult,g_XMOne); 2276#else // _XM_VMX128_INTRINSICS_ 2277#endif // _XM_VMX128_INTRINSICS_ 2278} 2279 2280//------------------------------------------------------------------------------ 2281// Bitwise logical operations 2282//------------------------------------------------------------------------------ 2283 2284XMFINLINE XMVECTOR XMVectorAndInt 2285( 2286 FXMVECTOR V1, 2287 FXMVECTOR V2 2288) 2289{ 2290#if defined(_XM_NO_INTRINSICS_) 2291 2292 XMVECTOR Result; 2293 2294 Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0]; 2295 Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1]; 2296 Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2]; 2297 Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3]; 2298 return Result; 2299 2300#elif defined(_XM_SSE_INTRINSICS_) 2301 return _mm_and_ps(V1,V2); 2302#else // _XM_VMX128_INTRINSICS_ 2303#endif // _XM_VMX128_INTRINSICS_ 2304} 2305 2306//------------------------------------------------------------------------------ 2307 2308XMFINLINE XMVECTOR XMVectorAndCInt 2309( 2310 FXMVECTOR V1, 2311 FXMVECTOR V2 2312) 2313{ 2314#if defined(_XM_NO_INTRINSICS_) 2315 2316 XMVECTOR Result; 2317 2318 Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0]; 2319 Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1]; 2320 Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2]; 2321 Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3]; 2322 2323 return Result; 2324 2325#elif defined(_XM_SSE_INTRINSICS_) 2326 __m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i *>(&V2)[0], reinterpret_cast<const __m128i *>(&V1)[0] ); 2327 return reinterpret_cast<__m128 *>(&V)[0]; 2328#else // _XM_VMX128_INTRINSICS_ 2329#endif // _XM_VMX128_INTRINSICS_ 2330} 2331 2332//------------------------------------------------------------------------------ 2333 2334XMFINLINE XMVECTOR XMVectorOrInt 2335( 2336 FXMVECTOR V1, 2337 FXMVECTOR V2 2338) 2339{ 2340#if defined(_XM_NO_INTRINSICS_) 2341 2342 XMVECTOR Result; 2343 2344 Result.vector4_u32[0] = V1.vector4_u32[0] | V2.vector4_u32[0]; 2345 Result.vector4_u32[1] = V1.vector4_u32[1] | V2.vector4_u32[1]; 2346 Result.vector4_u32[2] = V1.vector4_u32[2] | V2.vector4_u32[2]; 2347 Result.vector4_u32[3] = V1.vector4_u32[3] | V2.vector4_u32[3]; 2348 2349 return Result; 2350 2351#elif defined(_XM_SSE_INTRINSICS_) 2352 __m128i V = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] ); 2353 return reinterpret_cast<__m128 *>(&V)[0]; 2354#else // _XM_VMX128_INTRINSICS_ 2355#endif // _XM_VMX128_INTRINSICS_ 2356} 2357 2358//------------------------------------------------------------------------------ 2359 2360XMFINLINE XMVECTOR XMVectorNorInt 2361( 2362 FXMVECTOR V1, 2363 FXMVECTOR V2 2364) 2365{ 2366#if defined(_XM_NO_INTRINSICS_) 2367 2368 XMVECTOR Result; 2369 2370 Result.vector4_u32[0] = ~(V1.vector4_u32[0] | V2.vector4_u32[0]); 2371 Result.vector4_u32[1] = ~(V1.vector4_u32[1] | V2.vector4_u32[1]); 2372 Result.vector4_u32[2] = ~(V1.vector4_u32[2] | V2.vector4_u32[2]); 2373 Result.vector4_u32[3] = ~(V1.vector4_u32[3] | V2.vector4_u32[3]); 2374 2375 return Result; 2376 2377#elif defined(_XM_SSE_INTRINSICS_) 2378 __m128i Result; 2379 Result = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] ); 2380 Result = _mm_andnot_si128( Result,g_XMNegOneMask); 2381 return reinterpret_cast<__m128 *>(&Result)[0]; 2382#else // _XM_VMX128_INTRINSICS_ 2383#endif // _XM_VMX128_INTRINSICS_ 2384} 2385 2386//------------------------------------------------------------------------------ 2387 2388XMFINLINE XMVECTOR XMVectorXorInt 2389( 2390 FXMVECTOR V1, 2391 FXMVECTOR V2 2392) 2393{ 2394#if defined(_XM_NO_INTRINSICS_) 2395 2396 XMVECTOR Result; 2397 2398 Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0]; 2399 Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1]; 2400 Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2]; 2401 Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3]; 2402 2403 return Result; 2404 2405#elif defined(_XM_SSE_INTRINSICS_) 2406 __m128i V = _mm_xor_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] ); 2407 return reinterpret_cast<__m128 *>(&V)[0]; 2408#else // _XM_VMX128_INTRINSICS_ 2409#endif // _XM_VMX128_INTRINSICS_ 2410} 2411 2412//------------------------------------------------------------------------------ 2413// Computation operations 2414//------------------------------------------------------------------------------ 2415 2416//------------------------------------------------------------------------------ 2417 2418XMFINLINE XMVECTOR XMVectorNegate 2419( 2420 FXMVECTOR V 2421) 2422{ 2423#if defined(_XM_NO_INTRINSICS_) 2424 2425 XMVECTOR Result; 2426 2427 Result.vector4_f32[0] = -V.vector4_f32[0]; 2428 Result.vector4_f32[1] = -V.vector4_f32[1]; 2429 Result.vector4_f32[2] = -V.vector4_f32[2]; 2430 Result.vector4_f32[3] = -V.vector4_f32[3]; 2431 2432 return Result; 2433 2434#elif defined(_XM_SSE_INTRINSICS_) 2435 XMVECTOR Z; 2436 2437 Z = _mm_setzero_ps(); 2438 2439 return _mm_sub_ps( Z, V ); 2440#else // _XM_VMX128_INTRINSICS_ 2441#endif // _XM_VMX128_INTRINSICS_ 2442} 2443 2444//------------------------------------------------------------------------------ 2445 2446XMFINLINE XMVECTOR XMVectorAdd 2447( 2448 FXMVECTOR V1, 2449 FXMVECTOR V2 2450) 2451{ 2452#if defined(_XM_NO_INTRINSICS_) 2453 2454 XMVECTOR Result; 2455 2456 Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0]; 2457 Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1]; 2458 Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2]; 2459 Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3]; 2460 2461 return Result; 2462 2463#elif defined(_XM_SSE_INTRINSICS_) 2464 return _mm_add_ps( V1, V2 ); 2465#else // _XM_VMX128_INTRINSICS_ 2466#endif // _XM_VMX128_INTRINSICS_ 2467} 2468 2469//------------------------------------------------------------------------------ 2470 2471XMFINLINE XMVECTOR XMVectorAddAngles 2472( 2473 FXMVECTOR V1, 2474 FXMVECTOR V2 2475) 2476{ 2477#if defined(_XM_NO_INTRINSICS_) 2478 2479 XMVECTOR Mask; 2480 XMVECTOR Offset; 2481 XMVECTOR Result; 2482 CONST XMVECTOR Zero = XMVectorZero(); 2483 2484 // Add the given angles together. If the range of V1 is such 2485 // that -Pi <= V1 < Pi and the range of V2 is such that 2486 // -2Pi <= V2 <= 2Pi, then the range of the resulting angle 2487 // will be -Pi <= Result < Pi. 2488 Result = XMVectorAdd(V1, V2); 2489 2490 Mask = XMVectorLess(Result, g_XMNegativePi.v); 2491 Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); 2492 2493 Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); 2494 Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); 2495 2496 Result = XMVectorAdd(Result, Offset); 2497 2498 return Result; 2499 2500#elif defined(_XM_SSE_INTRINSICS_) 2501 // Adjust the angles 2502 XMVECTOR vResult = _mm_add_ps(V1,V2); 2503 // Less than Pi? 2504 XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi); 2505 vOffset = _mm_and_ps(vOffset,g_XMTwoPi); 2506 // Add 2Pi to all entries less than -Pi 2507 vResult = _mm_add_ps(vResult,vOffset); 2508 // Greater than or equal to Pi? 2509 vOffset = _mm_cmpge_ps(vResult,g_XMPi); 2510 vOffset = _mm_and_ps(vOffset,g_XMTwoPi); 2511 // Sub 2Pi to all entries greater than Pi 2512 vResult = _mm_sub_ps(vResult,vOffset); 2513 return vResult; 2514#else // _XM_VMX128_INTRINSICS_ 2515#endif // _XM_VMX128_INTRINSICS_ 2516} 2517 2518//------------------------------------------------------------------------------ 2519 2520XMFINLINE XMVECTOR XMVectorSubtract 2521( 2522 FXMVECTOR V1, 2523 FXMVECTOR V2 2524) 2525{ 2526#if defined(_XM_NO_INTRINSICS_) 2527 2528 XMVECTOR Result; 2529 2530 Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0]; 2531 Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1]; 2532 Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2]; 2533 Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3]; 2534 2535 return Result; 2536 2537#elif defined(_XM_SSE_INTRINSICS_) 2538 return _mm_sub_ps( V1, V2 ); 2539#else // _XM_VMX128_INTRINSICS_ 2540#endif // _XM_VMX128_INTRINSICS_ 2541} 2542 2543//------------------------------------------------------------------------------ 2544 2545XMFINLINE XMVECTOR XMVectorSubtractAngles 2546( 2547 FXMVECTOR V1, 2548 FXMVECTOR V2 2549) 2550{ 2551#if defined(_XM_NO_INTRINSICS_) 2552 2553 XMVECTOR Mask; 2554 XMVECTOR Offset; 2555 XMVECTOR Result; 2556 CONST XMVECTOR Zero = XMVectorZero(); 2557 2558 // Subtract the given angles. If the range of V1 is such 2559 // that -Pi <= V1 < Pi and the range of V2 is such that 2560 // -2Pi <= V2 <= 2Pi, then the range of the resulting angle 2561 // will be -Pi <= Result < Pi. 2562 Result = XMVectorSubtract(V1, V2); 2563 2564 Mask = XMVectorLess(Result, g_XMNegativePi.v); 2565 Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); 2566 2567 Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); 2568 Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); 2569 2570 Result = XMVectorAdd(Result, Offset); 2571 2572 return Result; 2573 2574#elif defined(_XM_SSE_INTRINSICS_) 2575 // Adjust the angles 2576 XMVECTOR vResult = _mm_sub_ps(V1,V2); 2577 // Less than Pi? 2578 XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi); 2579 vOffset = _mm_and_ps(vOffset,g_XMTwoPi); 2580 // Add 2Pi to all entries less than -Pi 2581 vResult = _mm_add_ps(vResult,vOffset); 2582 // Greater than or equal to Pi? 2583 vOffset = _mm_cmpge_ps(vResult,g_XMPi); 2584 vOffset = _mm_and_ps(vOffset,g_XMTwoPi); 2585 // Sub 2Pi to all entries greater than Pi 2586 vResult = _mm_sub_ps(vResult,vOffset); 2587 return vResult; 2588#else // _XM_VMX128_INTRINSICS_ 2589#endif // _XM_VMX128_INTRINSICS_ 2590} 2591 2592//------------------------------------------------------------------------------ 2593 2594XMFINLINE XMVECTOR XMVectorMultiply 2595( 2596 FXMVECTOR V1, 2597 FXMVECTOR V2 2598) 2599{ 2600#if defined(_XM_NO_INTRINSICS_) 2601 XMVECTOR Result = { 2602 V1.vector4_f32[0] * V2.vector4_f32[0], 2603 V1.vector4_f32[1] * V2.vector4_f32[1], 2604 V1.vector4_f32[2] * V2.vector4_f32[2], 2605 V1.vector4_f32[3] * V2.vector4_f32[3] 2606 }; 2607 return Result; 2608#elif defined(_XM_SSE_INTRINSICS_) 2609 return _mm_mul_ps( V1, V2 ); 2610#else // _XM_VMX128_INTRINSICS_ 2611#endif // _XM_VMX128_INTRINSICS_ 2612} 2613 2614//------------------------------------------------------------------------------ 2615 2616XMFINLINE XMVECTOR XMVectorMultiplyAdd 2617( 2618 FXMVECTOR V1, 2619 FXMVECTOR V2, 2620 FXMVECTOR V3 2621) 2622{ 2623#if defined(_XM_NO_INTRINSICS_) 2624 XMVECTOR vResult = { 2625 (V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0], 2626 (V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1], 2627 (V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2], 2628 (V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3] 2629 }; 2630 return vResult; 2631 2632#elif defined(_XM_SSE_INTRINSICS_) 2633 XMVECTOR vResult = _mm_mul_ps( V1, V2 ); 2634 return _mm_add_ps(vResult, V3 ); 2635#else // _XM_VMX128_INTRINSICS_ 2636#endif // _XM_VMX128_INTRINSICS_ 2637} 2638 2639//------------------------------------------------------------------------------ 2640 2641XMFINLINE XMVECTOR XMVectorDivide 2642( 2643 FXMVECTOR V1, 2644 FXMVECTOR V2 2645) 2646{ 2647#if defined(_XM_NO_INTRINSICS_) 2648 XMVECTOR Result; 2649 Result.vector4_f32[0] = V1.vector4_f32[0] / V2.vector4_f32[0]; 2650 Result.vector4_f32[1] = V1.vector4_f32[1] / V2.vector4_f32[1]; 2651 Result.vector4_f32[2] = V1.vector4_f32[2] / V2.vector4_f32[2]; 2652 Result.vector4_f32[3] = V1.vector4_f32[3] / V2.vector4_f32[3]; 2653 return Result; 2654#elif defined(_XM_SSE_INTRINSICS_) 2655 return _mm_div_ps( V1, V2 ); 2656#else // _XM_VMX128_INTRINSICS_ 2657#endif // _XM_VMX128_INTRINSICS_ 2658} 2659 2660//------------------------------------------------------------------------------ 2661 2662XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract 2663( 2664 FXMVECTOR V1, 2665 FXMVECTOR V2, 2666 FXMVECTOR V3 2667) 2668{ 2669#if defined(_XM_NO_INTRINSICS_) 2670 2671 XMVECTOR vResult = { 2672 V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]), 2673 V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]), 2674 V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]), 2675 V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3]) 2676 }; 2677 return vResult; 2678 2679#elif defined(_XM_SSE_INTRINSICS_) 2680 XMVECTOR R = _mm_mul_ps( V1, V2 ); 2681 return _mm_sub_ps( V3, R ); 2682#else // _XM_VMX128_INTRINSICS_ 2683#endif // _XM_VMX128_INTRINSICS_ 2684} 2685 2686//------------------------------------------------------------------------------ 2687 2688XMFINLINE XMVECTOR XMVectorScale 2689( 2690 FXMVECTOR V, 2691 FLOAT ScaleFactor 2692) 2693{ 2694#if defined(_XM_NO_INTRINSICS_) 2695 XMVECTOR vResult = { 2696 V.vector4_f32[0] * ScaleFactor, 2697 V.vector4_f32[1] * ScaleFactor, 2698 V.vector4_f32[2] * ScaleFactor, 2699 V.vector4_f32[3] * ScaleFactor 2700 }; 2701 return vResult; 2702 2703#elif defined(_XM_SSE_INTRINSICS_) 2704 XMVECTOR vResult = _mm_set_ps1(ScaleFactor); 2705 return _mm_mul_ps(vResult,V); 2706#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 2707#endif // _XM_VMX128_INTRINSICS_ 2708} 2709 2710//------------------------------------------------------------------------------ 2711 2712XMFINLINE XMVECTOR XMVectorReciprocalEst 2713( 2714 FXMVECTOR V 2715) 2716{ 2717#if defined(_XM_NO_INTRINSICS_) 2718 XMVECTOR Result; 2719 UINT i; 2720 2721 // Avoid C4701 2722 Result.vector4_f32[0] = 0.0f; 2723 2724 for (i = 0; i < 4; i++) 2725 { 2726 if (XMISNAN(V.vector4_f32[i])) 2727 { 2728 Result.vector4_u32[i] = 0x7FC00000; 2729 } 2730 else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f) 2731 { 2732 Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000); 2733 } 2734 else 2735 { 2736 Result.vector4_f32[i] = 1.f / V.vector4_f32[i]; 2737 } 2738 } 2739 return Result; 2740 2741#elif defined(_XM_SSE_INTRINSICS_) 2742 return _mm_rcp_ps(V); 2743#else // _XM_VMX128_INTRINSICS_ 2744#endif // _XM_VMX128_INTRINSICS_ 2745} 2746 2747//------------------------------------------------------------------------------ 2748 2749XMFINLINE XMVECTOR XMVectorReciprocal 2750( 2751 FXMVECTOR V 2752) 2753{ 2754#if defined(_XM_NO_INTRINSICS_) 2755 return XMVectorReciprocalEst(V); 2756 2757#elif defined(_XM_SSE_INTRINSICS_) 2758 return _mm_div_ps(g_XMOne,V); 2759#else // _XM_VMX128_INTRINSICS_ 2760#endif // _XM_VMX128_INTRINSICS_ 2761} 2762 2763//------------------------------------------------------------------------------ 2764// Return an estimated square root 2765XMFINLINE XMVECTOR XMVectorSqrtEst 2766( 2767 FXMVECTOR V 2768) 2769{ 2770#if defined(_XM_NO_INTRINSICS_) 2771 XMVECTOR Select; 2772 2773 // if (x == +Infinity) sqrt(x) = +Infinity 2774 // if (x == +0.0f) sqrt(x) = +0.0f 2775 // if (x == -0.0f) sqrt(x) = -0.0f 2776 // if (x < 0.0f) sqrt(x) = QNaN 2777 2778 XMVECTOR Result = XMVectorReciprocalSqrtEst(V); 2779 XMVECTOR Zero = XMVectorZero(); 2780 XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); 2781 XMVECTOR VEqualsZero = XMVectorEqual(V, Zero); 2782 Result = XMVectorMultiply(V, Result); 2783 Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); 2784 Result = XMVectorSelect(V, Result, Select); 2785 return Result; 2786 2787#elif defined(_XM_SSE_INTRINSICS_) 2788 return _mm_sqrt_ps(V); 2789#else // _XM_VMX128_INTRINSICS_ 2790#endif // _XM_VMX128_INTRINSICS_ 2791} 2792 2793//------------------------------------------------------------------------------ 2794 2795XMFINLINE XMVECTOR XMVectorSqrt 2796( 2797 FXMVECTOR V 2798) 2799{ 2800#if defined(_XM_NO_INTRINSICS_) 2801 2802 XMVECTOR Zero; 2803 XMVECTOR VEqualsInfinity, VEqualsZero; 2804 XMVECTOR Select; 2805 XMVECTOR Result; 2806 2807 // if (x == +Infinity) sqrt(x) = +Infinity 2808 // if (x == +0.0f) sqrt(x) = +0.0f 2809 // if (x == -0.0f) sqrt(x) = -0.0f 2810 // if (x < 0.0f) sqrt(x) = QNaN 2811 2812 Result = XMVectorReciprocalSqrt(V); 2813 Zero = XMVectorZero(); 2814 VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); 2815 VEqualsZero = XMVectorEqual(V, Zero); 2816 Result = XMVectorMultiply(V, Result); 2817 Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); 2818 Result = XMVectorSelect(V, Result, Select); 2819 2820 return Result; 2821 2822#elif defined(_XM_SSE_INTRINSICS_) 2823 return _mm_sqrt_ps(V); 2824#else // _XM_VMX128_INTRINSICS_ 2825#endif // _XM_VMX128_INTRINSICS_ 2826} 2827 2828//------------------------------------------------------------------------------ 2829 2830XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst 2831( 2832 FXMVECTOR V 2833) 2834{ 2835#if defined(_XM_NO_INTRINSICS_) 2836 2837 // if (x == +Infinity) rsqrt(x) = 0 2838 // if (x == +0.0f) rsqrt(x) = +Infinity 2839 // if (x == -0.0f) rsqrt(x) = -Infinity 2840 // if (x < 0.0f) rsqrt(x) = QNaN 2841 2842 XMVECTOR Result; 2843 UINT i; 2844 2845 // Avoid C4701 2846 Result.vector4_f32[0] = 0.0f; 2847 2848 for (i = 0; i < 4; i++) 2849 { 2850 if (XMISNAN(V.vector4_f32[i])) 2851 { 2852 Result.vector4_u32[i] = 0x7FC00000; 2853 } 2854 else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f) 2855 { 2856 Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000); 2857 } 2858 else if (V.vector4_f32[i] < 0.0f) 2859 { 2860 Result.vector4_u32[i] = 0x7FFFFFFF; 2861 } 2862 else if (XMISINF(V.vector4_f32[i])) 2863 { 2864 Result.vector4_f32[i] = 0.0f; 2865 } 2866 else 2867 { 2868 Result.vector4_f32[i] = 1.0f / sqrtf(V.vector4_f32[i]); 2869 } 2870 } 2871 2872 return Result; 2873 2874#elif defined(_XM_SSE_INTRINSICS_) 2875 return _mm_rsqrt_ps(V); 2876#else // _XM_VMX128_INTRINSICS_ 2877#endif // _XM_VMX128_INTRINSICS_ 2878} 2879 2880//------------------------------------------------------------------------------ 2881 2882XMFINLINE XMVECTOR XMVectorReciprocalSqrt 2883( 2884 FXMVECTOR V 2885) 2886{ 2887#if defined(_XM_NO_INTRINSICS_) 2888 2889 return XMVectorReciprocalSqrtEst(V); 2890 2891#elif defined(_XM_SSE_INTRINSICS_) 2892 XMVECTOR vResult = _mm_sqrt_ps(V); 2893 vResult = _mm_div_ps(g_XMOne,vResult); 2894 return vResult; 2895#else // _XM_VMX128_INTRINSICS_ 2896#endif // _XM_VMX128_INTRINSICS_ 2897} 2898 2899//------------------------------------------------------------------------------ 2900 2901XMFINLINE XMVECTOR XMVectorExpEst 2902( 2903 FXMVECTOR V 2904) 2905{ 2906#if defined(_XM_NO_INTRINSICS_) 2907 2908 XMVECTOR Result; 2909 Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]); 2910 Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]); 2911 Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]); 2912 Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]); 2913 return Result; 2914 2915#elif defined(_XM_SSE_INTRINSICS_) 2916 XMVECTOR vResult = _mm_setr_ps( 2917 powf(2.0f,XMVectorGetX(V)), 2918 powf(2.0f,XMVectorGetY(V)), 2919 powf(2.0f,XMVectorGetZ(V)), 2920 powf(2.0f,XMVectorGetW(V))); 2921 return vResult; 2922#else // _XM_VMX128_INTRINSICS_ 2923#endif // _XM_VMX128_INTRINSICS_ 2924} 2925 2926//------------------------------------------------------------------------------ 2927 2928XMINLINE XMVECTOR XMVectorExp 2929( 2930 FXMVECTOR V 2931) 2932{ 2933#if defined(_XM_NO_INTRINSICS_) 2934 2935 XMVECTOR E, S; 2936 XMVECTOR R, R2, R3, R4; 2937 XMVECTOR V0, V1; 2938 XMVECTOR C0X, C0Y, C0Z, C0W; 2939 XMVECTOR C1X, C1Y, C1Z, C1W; 2940 XMVECTOR Result; 2941 static CONST XMVECTOR C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f}; 2942 static CONST XMVECTOR C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f}; 2943 2944 R = XMVectorFloor(V); 2945 E = XMVectorExpEst(R); 2946 R = XMVectorSubtract(V, R); 2947 R2 = XMVectorMultiply(R, R); 2948 R3 = XMVectorMultiply(R, R2); 2949 R4 = XMVectorMultiply(R2, R2); 2950 2951 C0X = XMVectorSplatX(C0); 2952 C0Y = XMVectorSplatY(C0); 2953 C0Z = XMVectorSplatZ(C0); 2954 C0W = XMVectorSplatW(C0); 2955 2956 C1X = XMVectorSplatX(C1); 2957 C1Y = XMVectorSplatY(C1); 2958 C1Z = XMVectorSplatZ(C1); 2959 C1W = XMVectorSplatW(C1); 2960 2961 V0 = XMVectorMultiplyAdd(R, C0Y, C0X); 2962 V0 = XMVectorMultiplyAdd(R2, C0Z, V0); 2963 V0 = XMVectorMultiplyAdd(R3, C0W, V0); 2964 2965 V1 = XMVectorMultiplyAdd(R, C1Y, C1X); 2966 V1 = XMVectorMultiplyAdd(R2, C1Z, V1); 2967 V1 = XMVectorMultiplyAdd(R3, C1W, V1); 2968 2969 S = XMVectorMultiplyAdd(R4, V1, V0); 2970 2971 S = XMVectorReciprocal(S); 2972 Result = XMVectorMultiply(E, S); 2973 2974 return Result; 2975 2976#elif defined(_XM_SSE_INTRINSICS_) 2977 static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f}; 2978 static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f}; 2979 2980 // Get the integer of the input 2981 XMVECTOR R = XMVectorFloor(V); 2982 // Get the exponent estimate 2983 XMVECTOR E = XMVectorExpEst(R); 2984 // Get the fractional only 2985 R = _mm_sub_ps(V,R); 2986 // Get R^2 2987 XMVECTOR R2 = _mm_mul_ps(R,R); 2988 // And R^3 2989 XMVECTOR R3 = _mm_mul_ps(R,R2); 2990 2991 XMVECTOR V0 = _mm_load_ps1(&C0.f[1]); 2992 V0 = _mm_mul_ps(V0,R); 2993 XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]); 2994 V0 = _mm_add_ps(V0,vConstants); 2995 vConstants = _mm_load_ps1(&C0.f[2]); 2996 vConstants = _mm_mul_ps(vConstants,R2); 2997 V0 = _mm_add_ps(V0,vConstants); 2998 vConstants = _mm_load_ps1(&C0.f[3]); 2999 vConstants = _mm_mul_ps(vConstants,R3); 3000 V0 = _mm_add_ps(V0,vConstants); 3001 3002 XMVECTOR V1 = _mm_load_ps1(&C1.f[1]); 3003 V1 = _mm_mul_ps(V1,R); 3004 vConstants = _mm_load_ps1(&C1.f[0]); 3005 V1 = _mm_add_ps(V1,vConstants); 3006 vConstants = _mm_load_ps1(&C1.f[2]); 3007 vConstants = _mm_mul_ps(vConstants,R2); 3008 V1 = _mm_add_ps(V1,vConstants); 3009 vConstants = _mm_load_ps1(&C1.f[3]); 3010 vConstants = _mm_mul_ps(vConstants,R3); 3011 V1 = _mm_add_ps(V1,vConstants); 3012 // R2 = R^4 3013 R2 = _mm_mul_ps(R2,R2); 3014 R2 = _mm_mul_ps(R2,V1); 3015 R2 = _mm_add_ps(R2,V0); 3016 E = _mm_div_ps(E,R2); 3017 return E; 3018#else // _XM_VMX128_INTRINSICS_ 3019#endif // _XM_VMX128_INTRINSICS_ 3020} 3021 3022//------------------------------------------------------------------------------ 3023 3024XMFINLINE XMVECTOR XMVectorLogEst 3025( 3026 FXMVECTOR V 3027) 3028{ 3029#if defined(_XM_NO_INTRINSICS_) 3030 3031 FLOAT fScale = (1.0f / logf(2.0f)); 3032 XMVECTOR Result; 3033 3034 Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale; 3035 Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale; 3036 Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale; 3037 Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale; 3038 return Result; 3039 3040#elif defined(_XM_SSE_INTRINSICS_) 3041 XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f)); 3042 XMVECTOR vResult = _mm_setr_ps( 3043 logf(XMVectorGetX(V)), 3044 logf(XMVectorGetY(V)), 3045 logf(XMVectorGetZ(V)), 3046 logf(XMVectorGetW(V))); 3047 vResult = _mm_mul_ps(vResult,vScale); 3048 return vResult; 3049#else // _XM_VMX128_INTRINSICS_ 3050#endif // _XM_VMX128_INTRINSICS_ 3051} 3052 3053//------------------------------------------------------------------------------ 3054 3055XMINLINE XMVECTOR XMVectorLog 3056( 3057 FXMVECTOR V 3058) 3059{ 3060#if defined(_XM_NO_INTRINSICS_) 3061 FLOAT fScale = (1.0f / logf(2.0f)); 3062 XMVECTOR Result; 3063 3064 Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale; 3065 Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale; 3066 Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale; 3067 Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale; 3068 return Result; 3069 3070#elif defined(_XM_SSE_INTRINSICS_) 3071 XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f)); 3072 XMVECTOR vResult = _mm_setr_ps( 3073 logf(XMVectorGetX(V)), 3074 logf(XMVectorGetY(V)), 3075 logf(XMVectorGetZ(V)), 3076 logf(XMVectorGetW(V))); 3077 vResult = _mm_mul_ps(vResult,vScale); 3078 return vResult; 3079#else // _XM_VMX128_INTRINSICS_ 3080#endif // _XM_VMX128_INTRINSICS_ 3081} 3082 3083//------------------------------------------------------------------------------ 3084 3085XMFINLINE XMVECTOR XMVectorPowEst 3086( 3087 FXMVECTOR V1, 3088 FXMVECTOR V2 3089) 3090{ 3091#if defined(_XM_NO_INTRINSICS_) 3092 3093 XMVECTOR Result; 3094 3095 Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]); 3096 Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]); 3097 Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]); 3098 Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]); 3099 3100 return Result; 3101 3102#elif defined(_XM_SSE_INTRINSICS_) 3103 XMVECTOR vResult = _mm_setr_ps( 3104 powf(XMVectorGetX(V1),XMVectorGetX(V2)), 3105 powf(XMVectorGetY(V1),XMVectorGetY(V2)), 3106 powf(XMVectorGetZ(V1),XMVectorGetZ(V2)), 3107 powf(XMVectorGetW(V1),XMVectorGetW(V2))); 3108 return vResult; 3109#else // _XM_VMX128_INTRINSICS_ 3110#endif // _XM_VMX128_INTRINSICS_ 3111} 3112 3113//------------------------------------------------------------------------------ 3114 3115XMFINLINE XMVECTOR XMVectorPow 3116( 3117 FXMVECTOR V1, 3118 FXMVECTOR V2 3119) 3120{ 3121#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) 3122 3123 return XMVectorPowEst(V1, V2); 3124 3125#else // _XM_VMX128_INTRINSICS_ 3126#endif // _XM_VMX128_INTRINSICS_ 3127} 3128 3129//------------------------------------------------------------------------------ 3130 3131XMFINLINE XMVECTOR XMVectorAbs 3132( 3133 FXMVECTOR V 3134) 3135{ 3136#if defined(_XM_NO_INTRINSICS_) 3137 XMVECTOR vResult = { 3138 fabsf(V.vector4_f32[0]), 3139 fabsf(V.vector4_f32[1]), 3140 fabsf(V.vector4_f32[2]), 3141 fabsf(V.vector4_f32[3]) 3142 }; 3143 return vResult; 3144 3145#elif defined(_XM_SSE_INTRINSICS_) 3146 XMVECTOR vResult = _mm_setzero_ps(); 3147 vResult = _mm_sub_ps(vResult,V); 3148 vResult = _mm_max_ps(vResult,V); 3149 return vResult; 3150#else // _XM_VMX128_INTRINSICS_ 3151#endif // _XM_VMX128_INTRINSICS_ 3152} 3153 3154//------------------------------------------------------------------------------ 3155 3156XMFINLINE XMVECTOR XMVectorMod 3157( 3158 FXMVECTOR V1, 3159 FXMVECTOR V2 3160) 3161{ 3162#if defined(_XM_NO_INTRINSICS_) 3163 3164 XMVECTOR Reciprocal; 3165 XMVECTOR Quotient; 3166 XMVECTOR Result; 3167 3168 // V1 % V2 = V1 - V2 * truncate(V1 / V2) 3169 Reciprocal = XMVectorReciprocal(V2); 3170 Quotient = XMVectorMultiply(V1, Reciprocal); 3171 Quotient = XMVectorTruncate(Quotient); 3172 Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1); 3173 3174 return Result; 3175 3176#elif defined(_XM_SSE_INTRINSICS_) 3177 XMVECTOR vResult = _mm_div_ps(V1, V2); 3178 vResult = XMVectorTruncate(vResult); 3179 vResult = _mm_mul_ps(vResult,V2); 3180 vResult = _mm_sub_ps(V1,vResult); 3181 return vResult; 3182#else // _XM_VMX128_INTRINSICS_ 3183#endif // _XM_VMX128_INTRINSICS_ 3184} 3185 3186//------------------------------------------------------------------------------ 3187 3188XMFINLINE XMVECTOR XMVectorModAngles 3189( 3190 FXMVECTOR Angles 3191) 3192{ 3193#if defined(_XM_NO_INTRINSICS_) 3194 3195 XMVECTOR V; 3196 XMVECTOR Result; 3197 3198 // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI 3199 V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v); 3200 V = XMVectorRound(V); 3201 Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles); 3202 3203 return Result; 3204 3205#elif defined(_XM_SSE_INTRINSICS_) 3206 // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI 3207 XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi); 3208 // Use the inline function due to complexity for rounding 3209 vResult = XMVectorRound(vResult); 3210 vResult = _mm_mul_ps(vResult,g_XMTwoPi); 3211 vResult = _mm_sub_ps(Angles,vResult); 3212 return vResult; 3213#else // _XM_VMX128_INTRINSICS_ 3214#endif // _XM_VMX128_INTRINSICS_ 3215} 3216 3217//------------------------------------------------------------------------------ 3218 3219XMINLINE XMVECTOR XMVectorSin 3220( 3221 FXMVECTOR V 3222) 3223{ 3224 3225#if defined(_XM_NO_INTRINSICS_) 3226 3227 XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23; 3228 XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11; 3229 XMVECTOR Result; 3230 3231 V1 = XMVectorModAngles(V); 3232 3233 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - 3234 // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) 3235 V2 = XMVectorMultiply(V1, V1); 3236 V3 = XMVectorMultiply(V2, V1); 3237 V5 = XMVectorMultiply(V3, V2); 3238 V7 = XMVectorMultiply(V5, V2); 3239 V9 = XMVectorMultiply(V7, V2); 3240 V11 = XMVectorMultiply(V9, V2); 3241 V13 = XMVectorMultiply(V11, V2); 3242 V15 = XMVectorMultiply(V13, V2); 3243 V17 = XMVectorMultiply(V15, V2); 3244 V19 = XMVectorMultiply(V17, V2); 3245 V21 = XMVectorMultiply(V19, V2); 3246 V23 = XMVectorMultiply(V21, V2); 3247 3248 S1 = XMVectorSplatY(g_XMSinCoefficients0.v); 3249 S2 = XMVectorSplatZ(g_XMSinCoefficients0.v); 3250 S3 = XMVectorSplatW(g_XMSinCoefficients0.v); 3251 S4 = XMVectorSplatX(g_XMSinCoefficients1.v); 3252 S5 = XMVectorSplatY(g_XMSinCoefficients1.v); 3253 S6 = XMVectorSplatZ(g_XMSinCoefficients1.v); 3254 S7 = XMVectorSplatW(g_XMSinCoefficients1.v); 3255 S8 = XMVectorSplatX(g_XMSinCoefficients2.v); 3256 S9 = XMVectorSplatY(g_XMSinCoefficients2.v); 3257 S10 = XMVectorSplatZ(g_XMSinCoefficients2.v); 3258 S11 = XMVectorSplatW(g_XMSinCoefficients2.v); 3259 3260 Result = XMVectorMultiplyAdd(S1, V3, V1); 3261 Result = XMVectorMultiplyAdd(S2, V5, Result); 3262 Result = XMVectorMultiplyAdd(S3, V7, Result); 3263 Result = XMVectorMultiplyAdd(S4, V9, Result); 3264 Result = XMVectorMultiplyAdd(S5, V11, Result); 3265 Result = XMVectorMultiplyAdd(S6, V13, Result); 3266 Result = XMVectorMultiplyAdd(S7, V15, Result); 3267 Result = XMVectorMultiplyAdd(S8, V17, Result); 3268 Result = XMVectorMultiplyAdd(S9, V19, Result); 3269 Result = XMVectorMultiplyAdd(S10, V21, Result); 3270 Result = XMVectorMultiplyAdd(S11, V23, Result); 3271 3272 return Result; 3273 3274#elif defined(_XM_SSE_INTRINSICS_) 3275 // Force the value within the bounds of pi 3276 XMVECTOR vResult = XMVectorModAngles(V); 3277 // Each on is V to the "num" power 3278 // V2 = V1^2 3279 XMVECTOR V2 = _mm_mul_ps(vResult,vResult); 3280 // V1^3 3281 XMVECTOR vPower = _mm_mul_ps(vResult,V2); 3282 XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]); 3283 vConstants = _mm_mul_ps(vConstants,vPower); 3284 vResult = _mm_add_ps(vResult,vConstants); 3285 3286 // V^5 3287 vPower = _mm_mul_ps(vPower,V2); 3288 vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]); 3289 vConstants = _mm_mul_ps(vConstants,vPower); 3290 vResult = _mm_add_ps(vResult,vConstants); 3291 3292 // V^7 3293 vPower = _mm_mul_ps(vPower,V2); 3294 vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]); 3295 vConstants = _mm_mul_ps(vConstants,vPower); 3296 vResult = _mm_add_ps(vResult,vConstants); 3297 3298 // V^9 3299 vPower = _mm_mul_ps(vPower,V2); 3300 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]); 3301 vConstants = _mm_mul_ps(vConstants,vPower); 3302 vResult = _mm_add_ps(vResult,vConstants); 3303 3304 // V^11 3305 vPower = _mm_mul_ps(vPower,V2); 3306 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]); 3307 vConstants = _mm_mul_ps(vConstants,vPower); 3308 vResult = _mm_add_ps(vResult,vConstants); 3309 3310 // V^13 3311 vPower = _mm_mul_ps(vPower,V2); 3312 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]); 3313 vConstants = _mm_mul_ps(vConstants,vPower); 3314 vResult = _mm_add_ps(vResult,vConstants); 3315 3316 // V^15 3317 vPower = _mm_mul_ps(vPower,V2); 3318 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]); 3319 vConstants = _mm_mul_ps(vConstants,vPower); 3320 vResult = _mm_add_ps(vResult,vConstants); 3321 3322 // V^17 3323 vPower = _mm_mul_ps(vPower,V2); 3324 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]); 3325 vConstants = _mm_mul_ps(vConstants,vPower); 3326 vResult = _mm_add_ps(vResult,vConstants); 3327 3328 // V^19 3329 vPower = _mm_mul_ps(vPower,V2); 3330 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]); 3331 vConstants = _mm_mul_ps(vConstants,vPower); 3332 vResult = _mm_add_ps(vResult,vConstants); 3333 3334 // V^21 3335 vPower = _mm_mul_ps(vPower,V2); 3336 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]); 3337 vConstants = _mm_mul_ps(vConstants,vPower); 3338 vResult = _mm_add_ps(vResult,vConstants); 3339 3340 // V^23 3341 vPower = _mm_mul_ps(vPower,V2); 3342 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]); 3343 vConstants = _mm_mul_ps(vConstants,vPower); 3344 vResult = _mm_add_ps(vResult,vConstants); 3345 return vResult; 3346#else // _XM_VMX128_INTRINSICS_ 3347#endif // _XM_VMX128_INTRINSICS_ 3348} 3349 3350//------------------------------------------------------------------------------ 3351 3352XMINLINE XMVECTOR XMVectorCos 3353( 3354 FXMVECTOR V 3355) 3356{ 3357#if defined(_XM_NO_INTRINSICS_) 3358 3359 XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22; 3360 XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; 3361 XMVECTOR Result; 3362 3363 V1 = XMVectorModAngles(V); 3364 3365 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - 3366 // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) 3367 V2 = XMVectorMultiply(V1, V1); 3368 V4 = XMVectorMultiply(V2, V2); 3369 V6 = XMVectorMultiply(V4, V2); 3370 V8 = XMVectorMultiply(V4, V4); 3371 V10 = XMVectorMultiply(V6, V4); 3372 V12 = XMVectorMultiply(V6, V6); 3373 V14 = XMVectorMultiply(V8, V6); 3374 V16 = XMVectorMultiply(V8, V8); 3375 V18 = XMVectorMultiply(V10, V8); 3376 V20 = XMVectorMultiply(V10, V10); 3377 V22 = XMVectorMultiply(V12, V10); 3378 3379 C1 = XMVectorSplatY(g_XMCosCoefficients0.v); 3380 C2 = XMVectorSplatZ(g_XMCosCoefficients0.v); 3381 C3 = XMVectorSplatW(g_XMCosCoefficients0.v); 3382 C4 = XMVectorSplatX(g_XMCosCoefficients1.v); 3383 C5 = XMVectorSplatY(g_XMCosCoefficients1.v); 3384 C6 = XMVectorSplatZ(g_XMCosCoefficients1.v); 3385 C7 = XMVectorSplatW(g_XMCosCoefficients1.v); 3386 C8 = XMVectorSplatX(g_XMCosCoefficients2.v); 3387 C9 = XMVectorSplatY(g_XMCosCoefficients2.v); 3388 C10 = XMVectorSplatZ(g_XMCosCoefficients2.v); 3389 C11 = XMVectorSplatW(g_XMCosCoefficients2.v); 3390 3391 Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v); 3392 Result = XMVectorMultiplyAdd(C2, V4, Result); 3393 Result = XMVectorMultiplyAdd(C3, V6, Result); 3394 Result = XMVectorMultiplyAdd(C4, V8, Result); 3395 Result = XMVectorMultiplyAdd(C5, V10, Result); 3396 Result = XMVectorMultiplyAdd(C6, V12, Result); 3397 Result = XMVectorMultiplyAdd(C7, V14, Result); 3398 Result = XMVectorMultiplyAdd(C8, V16, Result); 3399 Result = XMVectorMultiplyAdd(C9, V18, Result); 3400 Result = XMVectorMultiplyAdd(C10, V20, Result); 3401 Result = XMVectorMultiplyAdd(C11, V22, Result); 3402 3403 return Result; 3404 3405#elif defined(_XM_SSE_INTRINSICS_) 3406 // Force the value within the bounds of pi 3407 XMVECTOR V2 = XMVectorModAngles(V); 3408 // Each on is V to the "num" power 3409 // V2 = V1^2 3410 V2 = _mm_mul_ps(V2,V2); 3411 // V^2 3412 XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]); 3413 vConstants = _mm_mul_ps(vConstants,V2); 3414 XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne); 3415 3416 // V^4 3417 XMVECTOR vPower = _mm_mul_ps(V2,V2); 3418 vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]); 3419 vConstants = _mm_mul_ps(vConstants,vPower); 3420 vResult = _mm_add_ps(vResult,vConstants); 3421 3422 // V^6 3423 vPower = _mm_mul_ps(vPower,V2); 3424 vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]); 3425 vConstants = _mm_mul_ps(vConstants,vPower); 3426 vResult = _mm_add_ps(vResult,vConstants); 3427 3428 // V^8 3429 vPower = _mm_mul_ps(vPower,V2); 3430 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]); 3431 vConstants = _mm_mul_ps(vConstants,vPower); 3432 vResult = _mm_add_ps(vResult,vConstants); 3433 3434 // V^10 3435 vPower = _mm_mul_ps(vPower,V2); 3436 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]); 3437 vConstants = _mm_mul_ps(vConstants,vPower); 3438 vResult = _mm_add_ps(vResult,vConstants); 3439 3440 // V^12 3441 vPower = _mm_mul_ps(vPower,V2); 3442 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]); 3443 vConstants = _mm_mul_ps(vConstants,vPower); 3444 vResult = _mm_add_ps(vResult,vConstants); 3445 3446 // V^14 3447 vPower = _mm_mul_ps(vPower,V2); 3448 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]); 3449 vConstants = _mm_mul_ps(vConstants,vPower); 3450 vResult = _mm_add_ps(vResult,vConstants); 3451 3452 // V^16 3453 vPower = _mm_mul_ps(vPower,V2); 3454 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]); 3455 vConstants = _mm_mul_ps(vConstants,vPower); 3456 vResult = _mm_add_ps(vResult,vConstants); 3457 3458 // V^18 3459 vPower = _mm_mul_ps(vPower,V2); 3460 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]); 3461 vConstants = _mm_mul_ps(vConstants,vPower); 3462 vResult = _mm_add_ps(vResult,vConstants); 3463 3464 // V^20 3465 vPower = _mm_mul_ps(vPower,V2); 3466 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]); 3467 vConstants = _mm_mul_ps(vConstants,vPower); 3468 vResult = _mm_add_ps(vResult,vConstants); 3469 3470 // V^22 3471 vPower = _mm_mul_ps(vPower,V2); 3472 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]); 3473 vConstants = _mm_mul_ps(vConstants,vPower); 3474 vResult = _mm_add_ps(vResult,vConstants); 3475 return vResult; 3476#else // _XM_VMX128_INTRINSICS_ 3477#endif // _XM_VMX128_INTRINSICS_ 3478} 3479 3480//------------------------------------------------------------------------------ 3481 3482XMINLINE VOID XMVectorSinCos 3483( 3484 XMVECTOR* pSin, 3485 XMVECTOR* pCos, 3486 FXMVECTOR V 3487) 3488{ 3489#if defined(_XM_NO_INTRINSICS_) 3490 3491 XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13; 3492 XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23; 3493 XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11; 3494 XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; 3495 XMVECTOR Sin, Cos; 3496 3497 XMASSERT(pSin); 3498 XMASSERT(pCos); 3499 3500 V1 = XMVectorModAngles(V); 3501 3502 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - 3503 // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) 3504 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - 3505 // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) 3506 3507 V2 = XMVectorMultiply(V1, V1); 3508 V3 = XMVectorMultiply(V2, V1); 3509 V4 = XMVectorMultiply(V2, V2); 3510 V5 = XMVectorMultiply(V3, V2); 3511 V6 = XMVectorMultiply(V3, V3); 3512 V7 = XMVectorMultiply(V4, V3); 3513 V8 = XMVectorMultiply(V4, V4); 3514 V9 = XMVectorMultiply(V5, V4); 3515 V10 = XMVectorMultiply(V5, V5); 3516 V11 = XMVectorMultiply(V6, V5); 3517 V12 = XMVectorMultiply(V6, V6); 3518 V13 = XMVectorMultiply(V7, V6); 3519 V14 = XMVectorMultiply(V7, V7); 3520 V15 = XMVectorMultiply(V8, V7); 3521 V16 = XMVectorMultiply(V8, V8); 3522 V17 = XMVectorMultiply(V9, V8); 3523 V18 = XMVectorMultiply(V9, V9); 3524 V19 = XMVectorMultiply(V10, V9); 3525 V20 = XMVectorMultiply(V10, V10); 3526 V21 = XMVectorMultiply(V11, V10); 3527 V22 = XMVectorMultiply(V11, V11); 3528 V23 = XMVectorMultiply(V12, V11); 3529 3530 S1 = XMVectorSplatY(g_XMSinCoefficients0.v); 3531 S2 = XMVectorSplatZ(g_XMSinCoefficients0.v); 3532 S3 = XMVectorSplatW(g_XMSinCoefficients0.v); 3533 S4 = XMVectorSplatX(g_XMSinCoefficients1.v); 3534 S5 = XMVectorSplatY(g_XMSinCoefficients1.v); 3535 S6 = XMVectorSplatZ(g_XMSinCoefficients1.v); 3536 S7 = XMVectorSplatW(g_XMSinCoefficients1.v); 3537 S8 = XMVectorSplatX(g_XMSinCoefficients2.v); 3538 S9 = XMVectorSplatY(g_XMSinCoefficients2.v); 3539 S10 = XMVectorSplatZ(g_XMSinCoefficients2.v); 3540 S11 = XMVectorSplatW(g_XMSinCoefficients2.v); 3541 3542 C1 = XMVectorSplatY(g_XMCosCoefficients0.v); 3543 C2 = XMVectorSplatZ(g_XMCosCoefficients0.v); 3544 C3 = XMVectorSplatW(g_XMCosCoefficients0.v); 3545 C4 = XMVectorSplatX(g_XMCosCoefficients1.v); 3546 C5 = XMVectorSplatY(g_XMCosCoefficients1.v); 3547 C6 = XMVectorSplatZ(g_XMCosCoefficients1.v); 3548 C7 = XMVectorSplatW(g_XMCosCoefficients1.v); 3549 C8 = XMVectorSplatX(g_XMCosCoefficients2.v); 3550 C9 = XMVectorSplatY(g_XMCosCoefficients2.v); 3551 C10 = XMVectorSplatZ(g_XMCosCoefficients2.v); 3552 C11 = XMVectorSplatW(g_XMCosCoefficients2.v); 3553 3554 Sin = XMVectorMultiplyAdd(S1, V3, V1); 3555 Sin = XMVectorMultiplyAdd(S2, V5, Sin); 3556 Sin = XMVectorMultiplyAdd(S3, V7, Sin); 3557 Sin = XMVectorMultiplyAdd(S4, V9, Sin); 3558 Sin = XMVectorMultiplyAdd(S5, V11, Sin); 3559 Sin = XMVectorMultiplyAdd(S6, V13, Sin); 3560 Sin = XMVectorMultiplyAdd(S7, V15, Sin); 3561 Sin = XMVectorMultiplyAdd(S8, V17, Sin); 3562 Sin = XMVectorMultiplyAdd(S9, V19, Sin); 3563 Sin = XMVectorMultiplyAdd(S10, V21, Sin); 3564 Sin = XMVectorMultiplyAdd(S11, V23, Sin); 3565 3566 Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v); 3567 Cos = XMVectorMultiplyAdd(C2, V4, Cos); 3568 Cos = XMVectorMultiplyAdd(C3, V6, Cos); 3569 Cos = XMVectorMultiplyAdd(C4, V8, Cos); 3570 Cos = XMVectorMultiplyAdd(C5, V10, Cos); 3571 Cos = XMVectorMultiplyAdd(C6, V12, Cos); 3572 Cos = XMVectorMultiplyAdd(C7, V14, Cos); 3573 Cos = XMVectorMultiplyAdd(C8, V16, Cos); 3574 Cos = XMVectorMultiplyAdd(C9, V18, Cos); 3575 Cos = XMVectorMultiplyAdd(C10, V20, Cos); 3576 Cos = XMVectorMultiplyAdd(C11, V22, Cos); 3577 3578 *pSin = Sin; 3579 *pCos = Cos; 3580 3581#elif defined(_XM_SSE_INTRINSICS_) 3582 XMASSERT(pSin); 3583 XMASSERT(pCos); 3584 XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13; 3585 XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23; 3586 XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11; 3587 XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; 3588 XMVECTOR Sin, Cos; 3589 3590 V1 = XMVectorModAngles(V); 3591 3592 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - 3593 // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) 3594 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - 3595 // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) 3596 3597 V2 = XMVectorMultiply(V1, V1); 3598 V3 = XMVectorMultiply(V2, V1); 3599 V4 = XMVectorMultiply(V2, V2); 3600 V5 = XMVectorMultiply(V3, V2); 3601 V6 = XMVectorMultiply(V3, V3); 3602 V7 = XMVectorMultiply(V4, V3); 3603 V8 = XMVectorMultiply(V4, V4); 3604 V9 = XMVectorMultiply(V5, V4); 3605 V10 = XMVectorMultiply(V5, V5); 3606 V11 = XMVectorMultiply(V6, V5); 3607 V12 = XMVectorMultiply(V6, V6); 3608 V13 = XMVectorMultiply(V7, V6); 3609 V14 = XMVectorMultiply(V7, V7); 3610 V15 = XMVectorMultiply(V8, V7); 3611 V16 = XMVectorMultiply(V8, V8); 3612 V17 = XMVectorMultiply(V9, V8); 3613 V18 = XMVectorMultiply(V9, V9); 3614 V19 = XMVectorMultiply(V10, V9); 3615 V20 = XMVectorMultiply(V10, V10); 3616 V21 = XMVectorMultiply(V11, V10); 3617 V22 = XMVectorMultiply(V11, V11); 3618 V23 = XMVectorMultiply(V12, V11); 3619 3620 S1 = _mm_load_ps1(&g_XMSinCoefficients0.f[1]); 3621 S2 = _mm_load_ps1(&g_XMSinCoefficients0.f[2]); 3622 S3 = _mm_load_ps1(&g_XMSinCoefficients0.f[3]); 3623 S4 = _mm_load_ps1(&g_XMSinCoefficients1.f[0]); 3624 S5 = _mm_load_ps1(&g_XMSinCoefficients1.f[1]); 3625 S6 = _mm_load_ps1(&g_XMSinCoefficients1.f[2]); 3626 S7 = _mm_load_ps1(&g_XMSinCoefficients1.f[3]); 3627 S8 = _mm_load_ps1(&g_XMSinCoefficients2.f[0]); 3628 S9 = _mm_load_ps1(&g_XMSinCoefficients2.f[1]); 3629 S10 = _mm_load_ps1(&g_XMSinCoefficients2.f[2]); 3630 S11 = _mm_load_ps1(&g_XMSinCoefficients2.f[3]); 3631 3632 C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]); 3633 C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]); 3634 C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]); 3635 C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]); 3636 C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]); 3637 C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]); 3638 C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]); 3639 C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]); 3640 C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]); 3641 C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]); 3642 C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]); 3643 3644 S1 = _mm_mul_ps(S1,V3); 3645 Sin = _mm_add_ps(S1,V1); 3646 Sin = XMVectorMultiplyAdd(S2, V5, Sin); 3647 Sin = XMVectorMultiplyAdd(S3, V7, Sin); 3648 Sin = XMVectorMultiplyAdd(S4, V9, Sin); 3649 Sin = XMVectorMultiplyAdd(S5, V11, Sin); 3650 Sin = XMVectorMultiplyAdd(S6, V13, Sin); 3651 Sin = XMVectorMultiplyAdd(S7, V15, Sin); 3652 Sin = XMVectorMultiplyAdd(S8, V17, Sin); 3653 Sin = XMVectorMultiplyAdd(S9, V19, Sin); 3654 Sin = XMVectorMultiplyAdd(S10, V21, Sin); 3655 Sin = XMVectorMultiplyAdd(S11, V23, Sin); 3656 3657 Cos = _mm_mul_ps(C1,V2); 3658 Cos = _mm_add_ps(Cos,g_XMOne); 3659 Cos = XMVectorMultiplyAdd(C2, V4, Cos); 3660 Cos = XMVectorMultiplyAdd(C3, V6, Cos); 3661 Cos = XMVectorMultiplyAdd(C4, V8, Cos); 3662 Cos = XMVectorMultiplyAdd(C5, V10, Cos); 3663 Cos = XMVectorMultiplyAdd(C6, V12, Cos); 3664 Cos = XMVectorMultiplyAdd(C7, V14, Cos); 3665 Cos = XMVectorMultiplyAdd(C8, V16, Cos); 3666 Cos = XMVectorMultiplyAdd(C9, V18, Cos); 3667 Cos = XMVectorMultiplyAdd(C10, V20, Cos); 3668 Cos = XMVectorMultiplyAdd(C11, V22, Cos); 3669 3670 *pSin = Sin; 3671 *pCos = Cos; 3672#else // _XM_VMX128_INTRINSICS_ 3673#endif // _XM_VMX128_INTRINSICS_ 3674} 3675 3676//------------------------------------------------------------------------------ 3677 3678XMINLINE XMVECTOR XMVectorTan 3679( 3680 FXMVECTOR V 3681) 3682{ 3683#if defined(_XM_NO_INTRINSICS_) 3684 3685 // Cody and Waite algorithm to compute tangent. 3686 3687 XMVECTOR VA, VB, VC, VC2; 3688 XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7; 3689 XMVECTOR C0, C1, TwoDivPi, Epsilon; 3690 XMVECTOR N, D; 3691 XMVECTOR R0, R1; 3692 XMVECTOR VIsZero, VCNearZero, VBIsEven; 3693 XMVECTOR Zero; 3694 XMVECTOR Result; 3695 UINT i; 3696 static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f}; 3697 static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f}; 3698 static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI}; 3699 static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1}; 3700 3701 TwoDivPi = XMVectorSplatW(TanConstants); 3702 3703 Zero = XMVectorZero(); 3704 3705 C0 = XMVectorSplatX(TanConstants); 3706 C1 = XMVectorSplatY(TanConstants); 3707 Epsilon = XMVectorSplatZ(TanConstants); 3708 3709 VA = XMVectorMultiply(V, TwoDivPi); 3710 3711 VA = XMVectorRound(VA); 3712 3713 VC = XMVectorNegativeMultiplySubtract(VA, C0, V); 3714 3715 VB = XMVectorAbs(VA); 3716 3717 VC = XMVectorNegativeMultiplySubtract(VA, C1, VC); 3718 3719 for (i = 0; i < 4; i++) 3720 { 3721 VB.vector4_u32[i] = (UINT)VB.vector4_f32[i]; 3722 } 3723 3724 VC2 = XMVectorMultiply(VC, VC); 3725 3726 T7 = XMVectorSplatW(TanCoefficients1); 3727 T6 = XMVectorSplatZ(TanCoefficients1); 3728 T4 = XMVectorSplatX(TanCoefficients1); 3729 T3 = XMVectorSplatW(TanCoefficients0); 3730 T5 = XMVectorSplatY(TanCoefficients1); 3731 T2 = XMVectorSplatZ(TanCoefficients0); 3732 T1 = XMVectorSplatY(TanCoefficients0); 3733 T0 = XMVectorSplatX(TanCoefficients0); 3734 3735 VBIsEven = XMVectorAndInt(VB, Mask.v); 3736 VBIsEven = XMVectorEqualInt(VBIsEven, Zero); 3737 3738 N = XMVectorMultiplyAdd(VC2, T7, T6); 3739 D = XMVectorMultiplyAdd(VC2, T4, T3); 3740 N = XMVectorMultiplyAdd(VC2, N, T5); 3741 D = XMVectorMultiplyAdd(VC2, D, T2); 3742 N = XMVectorMultiply(VC2, N); 3743 D = XMVectorMultiplyAdd(VC2, D, T1); 3744 N = XMVectorMultiplyAdd(VC, N, VC); 3745 VCNearZero = XMVectorInBounds(VC, Epsilon); 3746 D = XMVectorMultiplyAdd(VC2, D, T0); 3747 3748 N = XMVectorSelect(N, VC, VCNearZero); 3749 D = XMVectorSelect(D, g_XMOne.v, VCNearZero); 3750 3751 R0 = XMVectorNegate(N); 3752 R1 = XMVectorReciprocal(D); 3753 R0 = XMVectorReciprocal(R0); 3754 R1 = XMVectorMultiply(N, R1); 3755 R0 = XMVectorMultiply(D, R0); 3756 3757 VIsZero = XMVectorEqual(V, Zero); 3758 3759 Result = XMVectorSelect(R0, R1, VBIsEven); 3760 3761 Result = XMVectorSelect(Result, Zero, VIsZero); 3762 3763 return Result; 3764 3765#elif defined(_XM_SSE_INTRINSICS_) 3766 // Cody and Waite algorithm to compute tangent. 3767 3768 XMVECTOR VA, VB, VC, VC2; 3769 XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7; 3770 XMVECTOR C0, C1, TwoDivPi, Epsilon; 3771 XMVECTOR N, D; 3772 XMVECTOR R0, R1; 3773 XMVECTOR VIsZero, VCNearZero, VBIsEven; 3774 XMVECTOR Zero; 3775 XMVECTOR Result; 3776 static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f}; 3777 static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f}; 3778 static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI}; 3779 static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1}; 3780 3781 TwoDivPi = XMVectorSplatW(TanConstants); 3782 3783 Zero = XMVectorZero(); 3784 3785 C0 = XMVectorSplatX(TanConstants); 3786 C1 = XMVectorSplatY(TanConstants); 3787 Epsilon = XMVectorSplatZ(TanConstants); 3788 3789 VA = XMVectorMultiply(V, TwoDivPi); 3790 3791 VA = XMVectorRound(VA); 3792 3793 VC = XMVectorNegativeMultiplySubtract(VA, C0, V); 3794 3795 VB = XMVectorAbs(VA); 3796 3797 VC = XMVectorNegativeMultiplySubtract(VA, C1, VC); 3798 3799 reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB); 3800 3801 VC2 = XMVectorMultiply(VC, VC); 3802 3803 T7 = XMVectorSplatW(TanCoefficients1); 3804 T6 = XMVectorSplatZ(TanCoefficients1); 3805 T4 = XMVectorSplatX(TanCoefficients1); 3806 T3 = XMVectorSplatW(TanCoefficients0); 3807 T5 = XMVectorSplatY(TanCoefficients1); 3808 T2 = XMVectorSplatZ(TanCoefficients0); 3809 T1 = XMVectorSplatY(TanCoefficients0); 3810 T0 = XMVectorSplatX(TanCoefficients0); 3811 3812 VBIsEven = XMVectorAndInt(VB,Mask); 3813 VBIsEven = XMVectorEqualInt(VBIsEven, Zero); 3814 3815 N = XMVectorMultiplyAdd(VC2, T7, T6); 3816 D = XMVectorMultiplyAdd(VC2, T4, T3); 3817 N = XMVectorMultiplyAdd(VC2, N, T5); 3818 D = XMVectorMultiplyAdd(VC2, D, T2); 3819 N = XMVectorMultiply(VC2, N); 3820 D = XMVectorMultiplyAdd(VC2, D, T1); 3821 N = XMVectorMultiplyAdd(VC, N, VC); 3822 VCNearZero = XMVectorInBounds(VC, Epsilon); 3823 D = XMVectorMultiplyAdd(VC2, D, T0); 3824 3825 N = XMVectorSelect(N, VC, VCNearZero); 3826 D = XMVectorSelect(D, g_XMOne, VCNearZero); 3827 R0 = XMVectorNegate(N); 3828 R1 = _mm_div_ps(N,D); 3829 R0 = _mm_div_ps(D,R0); 3830 VIsZero = XMVectorEqual(V, Zero); 3831 Result = XMVectorSelect(R0, R1, VBIsEven); 3832 Result = XMVectorSelect(Result, Zero, VIsZero); 3833 3834 return Result; 3835 3836#else // _XM_VMX128_INTRINSICS_ 3837#endif // _XM_VMX128_INTRINSICS_ 3838} 3839 3840//------------------------------------------------------------------------------ 3841 3842XMINLINE XMVECTOR XMVectorSinH 3843( 3844 FXMVECTOR V 3845) 3846{ 3847#if defined(_XM_NO_INTRINSICS_) 3848 3849 XMVECTOR V1, V2; 3850 XMVECTOR E1, E2; 3851 XMVECTOR Result; 3852 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 3853 3854 V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v); 3855 V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v); 3856 3857 E1 = XMVectorExp(V1); 3858 E2 = XMVectorExp(V2); 3859 3860 Result = XMVectorSubtract(E1, E2); 3861 3862 return Result; 3863 3864#elif defined(_XM_SSE_INTRINSICS_) 3865 XMVECTOR V1, V2; 3866 XMVECTOR E1, E2; 3867 XMVECTOR Result; 3868 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 3869 3870 V1 = _mm_mul_ps(V, Scale); 3871 V1 = _mm_add_ps(V1,g_XMNegativeOne); 3872 V2 = _mm_mul_ps(V, Scale); 3873 V2 = _mm_sub_ps(g_XMNegativeOne,V2); 3874 E1 = XMVectorExp(V1); 3875 E2 = XMVectorExp(V2); 3876 3877 Result = _mm_sub_ps(E1, E2); 3878 3879 return Result; 3880#else // _XM_VMX128_INTRINSICS_ 3881#endif // _XM_VMX128_INTRINSICS_ 3882} 3883 3884//------------------------------------------------------------------------------ 3885 3886XMINLINE XMVECTOR XMVectorCosH 3887( 3888 FXMVECTOR V 3889) 3890{ 3891#if defined(_XM_NO_INTRINSICS_) 3892 3893 XMVECTOR V1, V2; 3894 XMVECTOR E1, E2; 3895 XMVECTOR Result; 3896 static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 3897 3898 V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v); 3899 V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v); 3900 3901 E1 = XMVectorExp(V1); 3902 E2 = XMVectorExp(V2); 3903 3904 Result = XMVectorAdd(E1, E2); 3905 3906 return Result; 3907 3908#elif defined(_XM_SSE_INTRINSICS_) 3909 XMVECTOR V1, V2; 3910 XMVECTOR E1, E2; 3911 XMVECTOR Result; 3912 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 3913 3914 V1 = _mm_mul_ps(V,Scale); 3915 V1 = _mm_add_ps(V1,g_XMNegativeOne); 3916 V2 = _mm_mul_ps(V, Scale); 3917 V2 = _mm_sub_ps(g_XMNegativeOne,V2); 3918 E1 = XMVectorExp(V1); 3919 E2 = XMVectorExp(V2); 3920 Result = _mm_add_ps(E1, E2); 3921 return Result; 3922#else // _XM_VMX128_INTRINSICS_ 3923#endif // _XM_VMX128_INTRINSICS_ 3924} 3925 3926//------------------------------------------------------------------------------ 3927 3928XMINLINE XMVECTOR XMVectorTanH 3929( 3930 FXMVECTOR V 3931) 3932{ 3933#if defined(_XM_NO_INTRINSICS_) 3934 3935 XMVECTOR E; 3936 XMVECTOR Result; 3937 static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) 3938 3939 E = XMVectorMultiply(V, Scale.v); 3940 E = XMVectorExp(E); 3941 E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v); 3942 E = XMVectorReciprocal(E); 3943 3944 Result = XMVectorSubtract(g_XMOne.v, E); 3945 3946 return Result; 3947 3948#elif defined(_XM_SSE_INTRINSICS_) 3949 static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) 3950 3951 XMVECTOR E = _mm_mul_ps(V, Scale); 3952 E = XMVectorExp(E); 3953 E = _mm_mul_ps(E,g_XMOneHalf); 3954 E = _mm_add_ps(E,g_XMOneHalf); 3955 E = XMVectorReciprocal(E); 3956 E = _mm_sub_ps(g_XMOne, E); 3957 return E; 3958#else // _XM_VMX128_INTRINSICS_ 3959#endif // _XM_VMX128_INTRINSICS_ 3960} 3961 3962//------------------------------------------------------------------------------ 3963 3964XMINLINE XMVECTOR XMVectorASin 3965( 3966 FXMVECTOR V 3967) 3968{ 3969#if defined(_XM_NO_INTRINSICS_) 3970 3971 XMVECTOR V2, V3, AbsV; 3972 XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; 3973 XMVECTOR R0, R1, R2, R3, R4; 3974 XMVECTOR OneMinusAbsV; 3975 XMVECTOR Rsq; 3976 XMVECTOR Result; 3977 static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; 3978 3979 // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) * 3980 // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5) 3981 3982 AbsV = XMVectorAbs(V); 3983 3984 V2 = XMVectorMultiply(V, V); 3985 V3 = XMVectorMultiply(V2, AbsV); 3986 3987 R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V); 3988 3989 OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV); 3990 Rsq = XMVectorReciprocalSqrt(OneMinusAbsV); 3991 3992 C0 = XMVectorSplatX(g_XMASinCoefficients0.v); 3993 C1 = XMVectorSplatY(g_XMASinCoefficients0.v); 3994 C2 = XMVectorSplatZ(g_XMASinCoefficients0.v); 3995 C3 = XMVectorSplatW(g_XMASinCoefficients0.v); 3996 3997 C4 = XMVectorSplatX(g_XMASinCoefficients1.v); 3998 C5 = XMVectorSplatY(g_XMASinCoefficients1.v); 3999 C6 = XMVectorSplatZ(g_XMASinCoefficients1.v); 4000 C7 = XMVectorSplatW(g_XMASinCoefficients1.v); 4001 4002 C8 = XMVectorSplatX(g_XMASinCoefficients2.v); 4003 C9 = XMVectorSplatY(g_XMASinCoefficients2.v); 4004 C10 = XMVectorSplatZ(g_XMASinCoefficients2.v); 4005 C11 = XMVectorSplatW(g_XMASinCoefficients2.v); 4006 4007 R0 = XMVectorMultiplyAdd(C3, AbsV, C7); 4008 R1 = XMVectorMultiplyAdd(C1, AbsV, C5); 4009 R2 = XMVectorMultiplyAdd(C2, AbsV, C6); 4010 R3 = XMVectorMultiplyAdd(C0, AbsV, C4); 4011 4012 R0 = XMVectorMultiplyAdd(R0, AbsV, C11); 4013 R1 = XMVectorMultiplyAdd(R1, AbsV, C9); 4014 R2 = XMVectorMultiplyAdd(R2, AbsV, C10); 4015 R3 = XMVectorMultiplyAdd(R3, AbsV, C8); 4016 4017 R0 = XMVectorMultiplyAdd(R2, V3, R0); 4018 R1 = XMVectorMultiplyAdd(R3, V3, R1); 4019 4020 R0 = XMVectorMultiply(V, R0); 4021 R1 = XMVectorMultiply(R4, R1); 4022 4023 Result = XMVectorMultiplyAdd(R1, Rsq, R0); 4024 4025 return Result; 4026 4027#elif defined(_XM_SSE_INTRINSICS_) 4028 static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; 4029 4030 // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) * 4031 // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5) 4032 // Get abs(V) 4033 XMVECTOR vAbsV = _mm_setzero_ps(); 4034 vAbsV = _mm_sub_ps(vAbsV,V); 4035 vAbsV = _mm_max_ps(vAbsV,V); 4036 4037 XMVECTOR R0 = vAbsV; 4038 XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]); 4039 R0 = _mm_mul_ps(R0,vConstants); 4040 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]); 4041 R0 = _mm_add_ps(R0,vConstants); 4042 4043 XMVECTOR R1 = vAbsV; 4044 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]); 4045 R1 = _mm_mul_ps(R1,vConstants); 4046 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]); 4047 R1 = _mm_add_ps(R1, vConstants); 4048 4049 XMVECTOR R2 = vAbsV; 4050 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]); 4051 R2 = _mm_mul_ps(R2,vConstants); 4052 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]); 4053 R2 = _mm_add_ps(R2, vConstants); 4054 4055 XMVECTOR R3 = vAbsV; 4056 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]); 4057 R3 = _mm_mul_ps(R3,vConstants); 4058 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]); 4059 R3 = _mm_add_ps(R3, vConstants); 4060 4061 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]); 4062 R0 = _mm_mul_ps(R0,vAbsV); 4063 R0 = _mm_add_ps(R0,vConstants); 4064 4065 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]); 4066 R1 = _mm_mul_ps(R1,vAbsV); 4067 R1 = _mm_add_ps(R1,vConstants); 4068 4069 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]); 4070 R2 = _mm_mul_ps(R2,vAbsV); 4071 R2 = _mm_add_ps(R2,vConstants); 4072 4073 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]); 4074 R3 = _mm_mul_ps(R3,vAbsV); 4075 R3 = _mm_add_ps(R3,vConstants); 4076 4077 // V3 = V^3 4078 vConstants = _mm_mul_ps(V,V); 4079 vConstants = _mm_mul_ps(vConstants, vAbsV); 4080 // Mul by V^3 4081 R2 = _mm_mul_ps(R2,vConstants); 4082 R3 = _mm_mul_ps(R3,vConstants); 4083 // Merge the results 4084 R0 = _mm_add_ps(R0,R2); 4085 R1 = _mm_add_ps(R1,R3); 4086 4087 R0 = _mm_mul_ps(R0,V); 4088 // vConstants = V-(V^2 retaining sign) 4089 vConstants = _mm_mul_ps(vAbsV, V); 4090 vConstants = _mm_sub_ps(V,vConstants); 4091 R1 = _mm_mul_ps(R1,vConstants); 4092 vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV); 4093 // Do NOT use rsqrt/mul. This needs the precision 4094 vConstants = _mm_sqrt_ps(vConstants); 4095 R1 = _mm_div_ps(R1,vConstants); 4096 R0 = _mm_add_ps(R0,R1); 4097 return R0; 4098#else // _XM_VMX128_INTRINSICS_ 4099#endif // _XM_VMX128_INTRINSICS_ 4100} 4101 4102//------------------------------------------------------------------------------ 4103 4104XMINLINE XMVECTOR XMVectorACos 4105( 4106 FXMVECTOR V 4107) 4108{ 4109#if defined(_XM_NO_INTRINSICS_) 4110 4111 XMVECTOR V2, V3, AbsV; 4112 XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; 4113 XMVECTOR R0, R1, R2, R3, R4; 4114 XMVECTOR OneMinusAbsV; 4115 XMVECTOR Rsq; 4116 XMVECTOR Result; 4117 static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; 4118 4119 // acos(V) = PI / 2 - asin(V) 4120 4121 AbsV = XMVectorAbs(V); 4122 4123 V2 = XMVectorMultiply(V, V); 4124 V3 = XMVectorMultiply(V2, AbsV); 4125 4126 R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V); 4127 4128 OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV); 4129 Rsq = XMVectorReciprocalSqrt(OneMinusAbsV); 4130 4131 C0 = XMVectorSplatX(g_XMASinCoefficients0.v); 4132 C1 = XMVectorSplatY(g_XMASinCoefficients0.v); 4133 C2 = XMVectorSplatZ(g_XMASinCoefficients0.v); 4134 C3 = XMVectorSplatW(g_XMASinCoefficients0.v); 4135 4136 C4 = XMVectorSplatX(g_XMASinCoefficients1.v); 4137 C5 = XMVectorSplatY(g_XMASinCoefficients1.v); 4138 C6 = XMVectorSplatZ(g_XMASinCoefficients1.v); 4139 C7 = XMVectorSplatW(g_XMASinCoefficients1.v); 4140 4141 C8 = XMVectorSplatX(g_XMASinCoefficients2.v); 4142 C9 = XMVectorSplatY(g_XMASinCoefficients2.v); 4143 C10 = XMVectorSplatZ(g_XMASinCoefficients2.v); 4144 C11 = XMVectorSplatW(g_XMASinCoefficients2.v); 4145 4146 R0 = XMVectorMultiplyAdd(C3, AbsV, C7); 4147 R1 = XMVectorMultiplyAdd(C1, AbsV, C5); 4148 R2 = XMVectorMultiplyAdd(C2, AbsV, C6); 4149 R3 = XMVectorMultiplyAdd(C0, AbsV, C4); 4150 4151 R0 = XMVectorMultiplyAdd(R0, AbsV, C11); 4152 R1 = XMVectorMultiplyAdd(R1, AbsV, C9); 4153 R2 = XMVectorMultiplyAdd(R2, AbsV, C10); 4154 R3 = XMVectorMultiplyAdd(R3, AbsV, C8); 4155 4156 R0 = XMVectorMultiplyAdd(R2, V3, R0); 4157 R1 = XMVectorMultiplyAdd(R3, V3, R1); 4158 4159 R0 = XMVectorMultiply(V, R0); 4160 R1 = XMVectorMultiply(R4, R1); 4161 4162 Result = XMVectorMultiplyAdd(R1, Rsq, R0); 4163 4164 Result = XMVectorSubtract(g_XMHalfPi.v, Result); 4165 4166 return Result; 4167 4168#elif defined(_XM_SSE_INTRINSICS_) 4169 static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; 4170 // Uses only 6 registers for good code on x86 targets 4171 // acos(V) = PI / 2 - asin(V) 4172 // Get abs(V) 4173 XMVECTOR vAbsV = _mm_setzero_ps(); 4174 vAbsV = _mm_sub_ps(vAbsV,V); 4175 vAbsV = _mm_max_ps(vAbsV,V); 4176 // Perform the series in precision groups to 4177 // retain precision across 20 bits. (3 bits of imprecision due to operations) 4178 XMVECTOR R0 = vAbsV; 4179 XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]); 4180 R0 = _mm_mul_ps(R0,vConstants); 4181 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]); 4182 R0 = _mm_add_ps(R0,vConstants); 4183 R0 = _mm_mul_ps(R0,vAbsV); 4184 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]); 4185 R0 = _mm_add_ps(R0,vConstants); 4186 4187 XMVECTOR R1 = vAbsV; 4188 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]); 4189 R1 = _mm_mul_ps(R1,vConstants); 4190 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]); 4191 R1 = _mm_add_ps(R1,vConstants); 4192 R1 = _mm_mul_ps(R1, vAbsV); 4193 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]); 4194 R1 = _mm_add_ps(R1,vConstants); 4195 4196 XMVECTOR R2 = vAbsV; 4197 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]); 4198 R2 = _mm_mul_ps(R2,vConstants); 4199 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]); 4200 R2 = _mm_add_ps(R2,vConstants); 4201 R2 = _mm_mul_ps(R2, vAbsV); 4202 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]); 4203 R2 = _mm_add_ps(R2,vConstants); 4204 4205 XMVECTOR R3 = vAbsV; 4206 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]); 4207 R3 = _mm_mul_ps(R3,vConstants); 4208 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]); 4209 R3 = _mm_add_ps(R3,vConstants); 4210 R3 = _mm_mul_ps(R3, vAbsV); 4211 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]); 4212 R3 = _mm_add_ps(R3,vConstants); 4213 4214 // vConstants = V^3 4215 vConstants = _mm_mul_ps(V,V); 4216 vConstants = _mm_mul_ps(vConstants,vAbsV); 4217 R2 = _mm_mul_ps(R2,vConstants); 4218 R3 = _mm_mul_ps(R3,vConstants); 4219 // Add the pair of values together here to retain 4220 // as much precision as possible 4221 R0 = _mm_add_ps(R0,R2); 4222 R1 = _mm_add_ps(R1,R3); 4223 4224 R0 = _mm_mul_ps(R0,V); 4225 // vConstants = V-(V*abs(V)) 4226 vConstants = _mm_mul_ps(V,vAbsV); 4227 vConstants = _mm_sub_ps(V,vConstants); 4228 R1 = _mm_mul_ps(R1,vConstants); 4229 // Episilon exists to allow 1.0 as an answer 4230 vConstants = _mm_sub_ps(OnePlusEpsilon, vAbsV); 4231 // Use sqrt instead of rsqrt for precision 4232 vConstants = _mm_sqrt_ps(vConstants); 4233 R1 = _mm_div_ps(R1,vConstants); 4234 R1 = _mm_add_ps(R1,R0); 4235 vConstants = _mm_sub_ps(g_XMHalfPi,R1); 4236 return vConstants; 4237#else // _XM_VMX128_INTRINSICS_ 4238#endif // _XM_VMX128_INTRINSICS_ 4239} 4240 4241//------------------------------------------------------------------------------ 4242 4243XMINLINE XMVECTOR XMVectorATan 4244( 4245 FXMVECTOR V 4246) 4247{ 4248#if defined(_XM_NO_INTRINSICS_) 4249 4250 // Cody and Waite algorithm to compute inverse tangent. 4251 4252 XMVECTOR N, D; 4253 XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB; 4254 XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3; 4255 XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV; 4256 XMVECTOR Zero; 4257 XMVECTOR NegativeHalfPi; 4258 XMVECTOR Angle1, Angle2; 4259 XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV; 4260 XMVECTOR NegativeResult, Result; 4261 XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3; 4262 static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f}; 4263 static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f}; 4264 static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon> 4265 static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV> 4266 4267 Zero = XMVectorZero(); 4268 4269 P0 = XMVectorSplatX(ATanConstants0); 4270 P1 = XMVectorSplatY(ATanConstants0); 4271 P2 = XMVectorSplatZ(ATanConstants0); 4272 P3 = XMVectorSplatW(ATanConstants0); 4273 4274 Q0 = XMVectorSplatX(ATanConstants1); 4275 Q1 = XMVectorSplatY(ATanConstants1); 4276 Q2 = XMVectorSplatZ(ATanConstants1); 4277 Q3 = XMVectorSplatW(ATanConstants1); 4278 4279 Sqrt3 = XMVectorSplatX(ATanConstants2); 4280 Sqrt3MinusOne = XMVectorSplatY(ATanConstants2); 4281 TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2); 4282 Epsilon = XMVectorSplatW(ATanConstants2); 4283 4284 HalfPi = XMVectorSplatX(ATanConstants3); 4285 OneThirdPi = XMVectorSplatY(ATanConstants3); 4286 OneSixthPi = XMVectorSplatZ(ATanConstants3); 4287 MaxV = XMVectorSplatW(ATanConstants3); 4288 4289 VF = XMVectorAbs(V); 4290 ReciprocalF = XMVectorReciprocal(VF); 4291 4292 F_GT_One = XMVectorGreater(VF, g_XMOne.v); 4293 4294 VF = XMVectorSelect(VF, ReciprocalF, F_GT_One); 4295 Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One); 4296 Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One); 4297 4298 F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3); 4299 4300 FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF); 4301 FA = XMVectorAdd(FA, g_XMNegativeOne.v); 4302 FB = XMVectorAdd(VF, Sqrt3); 4303 FB = XMVectorReciprocal(FB); 4304 FA = XMVectorMultiply(FA, FB); 4305 4306 VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3); 4307 Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3); 4308 4309 AbsF = XMVectorAbs(VF); 4310 AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon); 4311 4312 G = XMVectorMultiply(VF, VF); 4313 4314 D = XMVectorAdd(G, Q3); 4315 D = XMVectorMultiplyAdd(D, G, Q2); 4316 D = XMVectorMultiplyAdd(D, G, Q1); 4317 D = XMVectorMultiplyAdd(D, G, Q0); 4318 D = XMVectorReciprocal(D); 4319 4320 N = XMVectorMultiplyAdd(P3, G, P2); 4321 N = XMVectorMultiplyAdd(N, G, P1); 4322 N = XMVectorMultiplyAdd(N, G, P0); 4323 N = XMVectorMultiply(N, G); 4324 Result = XMVectorMultiply(N, D); 4325 4326 Result = XMVectorMultiplyAdd(Result, VF, VF); 4327 4328 Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon); 4329 4330 NegativeResult = XMVectorNegate(Result); 4331 Result = XMVectorSelect(Result, NegativeResult, F_GT_One); 4332 4333 Result = XMVectorAdd(Result, Angle1); 4334 4335 V_LT_Zero = XMVectorLess(V, Zero); 4336 NegativeResult = XMVectorNegate(Result); 4337 Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero); 4338 4339 MinV = XMVectorNegate(MaxV); 4340 NegativeHalfPi = XMVectorNegate(HalfPi); 4341 V_GT_MaxV = XMVectorGreater(V, MaxV); 4342 V_LT_MinV = XMVectorLess(V, MinV); 4343 Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV); 4344 Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV); 4345 4346 return Result; 4347 4348#elif defined(_XM_SSE_INTRINSICS_) 4349 static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f}; 4350 static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f}; 4351 static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon> 4352 static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV> 4353 4354 XMVECTOR VF = XMVectorAbs(V); 4355 XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne); 4356 XMVECTOR ReciprocalF = XMVectorReciprocal(VF); 4357 VF = XMVectorSelect(VF, ReciprocalF, F_GT_One); 4358 XMVECTOR Zero = XMVectorZero(); 4359 XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]); 4360 XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One); 4361 // Pi/3 4362 XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]); 4363 // Pi/6 4364 XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]); 4365 Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One); 4366 4367 // 1-sqrt(3) 4368 XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]); 4369 FA = _mm_mul_ps(FA,VF); 4370 FA = _mm_add_ps(FA,VF); 4371 FA = _mm_add_ps(FA,g_XMNegativeOne); 4372 // sqrt(3) 4373 vConstants = _mm_load_ps1(&ATanConstants2.f[0]); 4374 vConstants = _mm_add_ps(vConstants,VF); 4375 FA = _mm_div_ps(FA,vConstants); 4376 4377 // 2-sqrt(3) 4378 vConstants = _mm_load_ps1(&ATanConstants2.f[2]); 4379 // >2-sqrt(3)? 4380 vConstants = _mm_cmpgt_ps(VF,vConstants); 4381 VF = XMVectorSelect(VF, FA, vConstants); 4382 Angle1 = XMVectorSelect(Angle1, Angle2, vConstants); 4383 4384 XMVECTOR AbsF = XMVectorAbs(VF); 4385 4386 XMVECTOR G = _mm_mul_ps(VF,VF); 4387 XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]); 4388 D = _mm_add_ps(D,G); 4389 D = _mm_mul_ps(D,G); 4390 vConstants = _mm_load_ps1(&ATanConstants1.f[2]); 4391 D = _mm_add_ps(D,vConstants); 4392 D = _mm_mul_ps(D,G); 4393 vConstants = _mm_load_ps1(&ATanConstants1.f[1]); 4394 D = _mm_add_ps(D,vConstants); 4395 D = _mm_mul_ps(D,G); 4396 vConstants = _mm_load_ps1(&ATanConstants1.f[0]); 4397 D = _mm_add_ps(D,vConstants); 4398 4399 XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]); 4400 N = _mm_mul_ps(N,G); 4401 vConstants = _mm_load_ps1(&ATanConstants0.f[2]); 4402 N = _mm_add_ps(N,vConstants); 4403 N = _mm_mul_ps(N,G); 4404 vConstants = _mm_load_ps1(&ATanConstants0.f[1]); 4405 N = _mm_add_ps(N,vConstants); 4406 N = _mm_mul_ps(N,G); 4407 vConstants = _mm_load_ps1(&ATanConstants0.f[0]); 4408 N = _mm_add_ps(N,vConstants); 4409 N = _mm_mul_ps(N,G); 4410 XMVECTOR Result = _mm_div_ps(N,D); 4411 4412 Result = _mm_mul_ps(Result,VF); 4413 Result = _mm_add_ps(Result,VF); 4414 // Epsilon 4415 vConstants = _mm_load_ps1(&ATanConstants2.f[3]); 4416 vConstants = _mm_cmpge_ps(vConstants,AbsF); 4417 Result = XMVectorSelect(Result,VF,vConstants); 4418 4419 XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne); 4420 Result = XMVectorSelect(Result,NegativeResult,F_GT_One); 4421 Result = _mm_add_ps(Result,Angle1); 4422 4423 Zero = _mm_cmpge_ps(Zero,V); 4424 NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne); 4425 Result = XMVectorSelect(Result,NegativeResult,Zero); 4426 4427 XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]); 4428 XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne); 4429 // Negate HalfPi 4430 HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne); 4431 MaxV = _mm_cmple_ps(MaxV,V); 4432 MinV = _mm_cmpge_ps(MinV,V); 4433 Result = XMVectorSelect(Result,g_XMHalfPi,MaxV); 4434 // HalfPi = -HalfPi 4435 Result = XMVectorSelect(Result,HalfPi,MinV); 4436 return Result; 4437#else // _XM_VMX128_INTRINSICS_ 4438#endif // _XM_VMX128_INTRINSICS_ 4439} 4440 4441//------------------------------------------------------------------------------ 4442 4443XMINLINE XMVECTOR XMVectorATan2 4444( 4445 FXMVECTOR Y, 4446 FXMVECTOR X 4447) 4448{ 4449#if defined(_XM_NO_INTRINSICS_) 4450 4451 // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions: 4452 4453 // Y == 0 and X is Negative -> Pi with the sign of Y 4454 // y == 0 and x is positive -> 0 with the sign of y 4455 // Y != 0 and X == 0 -> Pi / 2 with the sign of Y 4456 // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of Y) 4457 // X == -Infinity and Finite Y -> Pi with the sign of Y 4458 // X == +Infinity and Finite Y -> 0 with the sign of Y 4459 // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y 4460 // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y 4461 // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y 4462 4463 XMVECTOR Reciprocal; 4464 XMVECTOR V; 4465 XMVECTOR YSign; 4466 XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour; 4467 XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity; 4468 XMVECTOR ATanResultValid; 4469 XMVECTOR R0, R1, R2, R3, R4, R5; 4470 XMVECTOR Zero; 4471 XMVECTOR Result; 4472 static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; 4473 4474 Zero = XMVectorZero(); 4475 ATanResultValid = XMVectorTrueInt(); 4476 4477 Pi = XMVectorSplatX(ATan2Constants); 4478 PiOverTwo = XMVectorSplatY(ATan2Constants); 4479 PiOverFour = XMVectorSplatZ(ATan2Constants); 4480 ThreePiOverFour = XMVectorSplatW(ATan2Constants); 4481 4482 YEqualsZero = XMVectorEqual(Y, Zero); 4483 XEqualsZero = XMVectorEqual(X, Zero); 4484 XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); 4485 XIsPositive = XMVectorEqualInt(XIsPositive, Zero); 4486 YEqualsInfinity = XMVectorIsInfinite(Y); 4487 XEqualsInfinity = XMVectorIsInfinite(X); 4488 4489 YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); 4490 Pi = XMVectorOrInt(Pi, YSign); 4491 PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); 4492 PiOverFour = XMVectorOrInt(PiOverFour, YSign); 4493 ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); 4494 4495 R1 = XMVectorSelect(Pi, YSign, XIsPositive); 4496 R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); 4497 R3 = XMVectorSelect(R2, R1, YEqualsZero); 4498 R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); 4499 R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); 4500 Result = XMVectorSelect(R3, R5, YEqualsInfinity); 4501 ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); 4502 4503 Reciprocal = XMVectorReciprocal(X); 4504 V = XMVectorMultiply(Y, Reciprocal); 4505 R0 = XMVectorATan(V); 4506 4507 R1 = XMVectorSelect( Pi, Zero, XIsPositive ); 4508 R2 = XMVectorAdd(R0, R1); 4509 4510 Result = XMVectorSelect(Result, R2, ATanResultValid); 4511 4512 return Result; 4513 4514#elif defined(_XM_SSE_INTRINSICS_) 4515 static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; 4516 4517 // Mask if Y>0 && Y!=INF 4518 XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); 4519 // Get the sign of (Y&0x80000000) 4520 XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero); 4521 // Get the sign bits of X 4522 XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero); 4523 // Change them to masks 4524 XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero); 4525 // Get Pi 4526 XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]); 4527 // Copy the sign of Y 4528 Pi = _mm_or_ps(Pi,YSign); 4529 XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive); 4530 // Mask for X==0 4531 XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero); 4532 // Get Pi/2 with with sign of Y 4533 XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]); 4534 PiOverTwo = _mm_or_ps(PiOverTwo,YSign); 4535 XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants); 4536 // Mask for Y==0 4537 vConstants = _mm_cmpeq_ps(Y,g_XMZero); 4538 R2 = XMVectorSelect(R2,R1,vConstants); 4539 // Get Pi/4 with sign of Y 4540 XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]); 4541 PiOverFour = _mm_or_ps(PiOverFour,YSign); 4542 // Get (Pi*3)/4 with sign of Y 4543 XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]); 4544 ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign); 4545 vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); 4546 XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); 4547 vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity); 4548 4549 XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity); 4550 vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity); 4551 // At this point, any entry that's zero will get the result 4552 // from XMVectorATan(), otherwise, return the failsafe value 4553 vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity); 4554 // Any entries not 0xFFFFFFFF, are considered precalculated 4555 XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask); 4556 // Let's do the ATan2 function 4557 vConstants = _mm_div_ps(Y,X); 4558 vConstants = XMVectorATan(vConstants); 4559 // Discard entries that have been declared void 4560 4561 XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive ); 4562 vConstants = _mm_add_ps( vConstants, R3 ); 4563 4564 vResult = XMVectorSelect(vResult,vConstants,ATanResultValid); 4565 return vResult; 4566#else // _XM_VMX128_INTRINSICS_ 4567#endif // _XM_VMX128_INTRINSICS_ 4568} 4569 4570//------------------------------------------------------------------------------ 4571 4572XMFINLINE XMVECTOR XMVectorSinEst 4573( 4574 FXMVECTOR V 4575) 4576{ 4577#if defined(_XM_NO_INTRINSICS_) 4578 4579 XMVECTOR V2, V3, V5, V7; 4580 XMVECTOR S1, S2, S3; 4581 XMVECTOR Result; 4582 4583 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) 4584 V2 = XMVectorMultiply(V, V); 4585 V3 = XMVectorMultiply(V2, V); 4586 V5 = XMVectorMultiply(V3, V2); 4587 V7 = XMVectorMultiply(V5, V2); 4588 4589 S1 = XMVectorSplatY(g_XMSinEstCoefficients.v); 4590 S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v); 4591 S3 = XMVectorSplatW(g_XMSinEstCoefficients.v); 4592 4593 Result = XMVectorMultiplyAdd(S1, V3, V); 4594 Result = XMVectorMultiplyAdd(S2, V5, Result); 4595 Result = XMVectorMultiplyAdd(S3, V7, Result); 4596 4597 return Result; 4598 4599#elif defined(_XM_SSE_INTRINSICS_) 4600 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) 4601 XMVECTOR V2 = _mm_mul_ps(V,V); 4602 XMVECTOR V3 = _mm_mul_ps(V2,V); 4603 XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]); 4604 vResult = _mm_mul_ps(vResult,V3); 4605 vResult = _mm_add_ps(vResult,V); 4606 XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]); 4607 // V^5 4608 V3 = _mm_mul_ps(V3,V2); 4609 vConstants = _mm_mul_ps(vConstants,V3); 4610 vResult = _mm_add_ps(vResult,vConstants); 4611 vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]); 4612 // V^7 4613 V3 = _mm_mul_ps(V3,V2); 4614 vConstants = _mm_mul_ps(vConstants,V3); 4615 vResult = _mm_add_ps(vResult,vConstants); 4616 return vResult; 4617#else // _XM_VMX128_INTRINSICS_ 4618#endif // _XM_VMX128_INTRINSICS_ 4619} 4620 4621//------------------------------------------------------------------------------ 4622 4623XMFINLINE XMVECTOR XMVectorCosEst 4624( 4625 FXMVECTOR V 4626) 4627{ 4628#if defined(_XM_NO_INTRINSICS_) 4629 4630 XMVECTOR V2, V4, V6; 4631 XMVECTOR C0, C1, C2, C3; 4632 XMVECTOR Result; 4633 4634 V2 = XMVectorMultiply(V, V); 4635 V4 = XMVectorMultiply(V2, V2); 4636 V6 = XMVectorMultiply(V4, V2); 4637 4638 C0 = XMVectorSplatX(g_XMCosEstCoefficients.v); 4639 C1 = XMVectorSplatY(g_XMCosEstCoefficients.v); 4640 C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v); 4641 C3 = XMVectorSplatW(g_XMCosEstCoefficients.v); 4642 4643 Result = XMVectorMultiplyAdd(C1, V2, C0); 4644 Result = XMVectorMultiplyAdd(C2, V4, Result); 4645 Result = XMVectorMultiplyAdd(C3, V6, Result); 4646 4647 return Result; 4648 4649#elif defined(_XM_SSE_INTRINSICS_) 4650 // Get V^2 4651 XMVECTOR V2 = _mm_mul_ps(V,V); 4652 XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]); 4653 vResult = _mm_mul_ps(vResult,V2); 4654 XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]); 4655 vResult = _mm_add_ps(vResult,vConstants); 4656 vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]); 4657 // Get V^4 4658 XMVECTOR V4 = _mm_mul_ps(V2, V2); 4659 vConstants = _mm_mul_ps(vConstants,V4); 4660 vResult = _mm_add_ps(vResult,vConstants); 4661 vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]); 4662 // It's really V^6 4663 V4 = _mm_mul_ps(V4,V2); 4664 vConstants = _mm_mul_ps(vConstants,V4); 4665 vResult = _mm_add_ps(vResult,vConstants); 4666 return vResult; 4667#else // _XM_VMX128_INTRINSICS_ 4668#endif // _XM_VMX128_INTRINSICS_ 4669} 4670 4671//------------------------------------------------------------------------------ 4672 4673XMFINLINE VOID XMVectorSinCosEst 4674( 4675 XMVECTOR* pSin, 4676 XMVECTOR* pCos, 4677 FXMVECTOR V 4678) 4679{ 4680#if defined(_XM_NO_INTRINSICS_) 4681 4682 XMVECTOR V2, V3, V4, V5, V6, V7; 4683 XMVECTOR S1, S2, S3; 4684 XMVECTOR C0, C1, C2, C3; 4685 XMVECTOR Sin, Cos; 4686 4687 XMASSERT(pSin); 4688 XMASSERT(pCos); 4689 4690 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) 4691 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) 4692 V2 = XMVectorMultiply(V, V); 4693 V3 = XMVectorMultiply(V2, V); 4694 V4 = XMVectorMultiply(V2, V2); 4695 V5 = XMVectorMultiply(V3, V2); 4696 V6 = XMVectorMultiply(V3, V3); 4697 V7 = XMVectorMultiply(V4, V3); 4698 4699 S1 = XMVectorSplatY(g_XMSinEstCoefficients.v); 4700 S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v); 4701 S3 = XMVectorSplatW(g_XMSinEstCoefficients.v); 4702 4703 C0 = XMVectorSplatX(g_XMCosEstCoefficients.v); 4704 C1 = XMVectorSplatY(g_XMCosEstCoefficients.v); 4705 C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v); 4706 C3 = XMVectorSplatW(g_XMCosEstCoefficients.v); 4707 4708 Sin = XMVectorMultiplyAdd(S1, V3, V); 4709 Sin = XMVectorMultiplyAdd(S2, V5, Sin); 4710 Sin = XMVectorMultiplyAdd(S3, V7, Sin); 4711 4712 Cos = XMVectorMultiplyAdd(C1, V2, C0); 4713 Cos = XMVectorMultiplyAdd(C2, V4, Cos); 4714 Cos = XMVectorMultiplyAdd(C3, V6, Cos); 4715 4716 *pSin = Sin; 4717 *pCos = Cos; 4718 4719#elif defined(_XM_SSE_INTRINSICS_) 4720 XMASSERT(pSin); 4721 XMASSERT(pCos); 4722 XMVECTOR V2, V3, V4, V5, V6, V7; 4723 XMVECTOR S1, S2, S3; 4724 XMVECTOR C0, C1, C2, C3; 4725 XMVECTOR Sin, Cos; 4726 4727 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) 4728 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) 4729 V2 = XMVectorMultiply(V, V); 4730 V3 = XMVectorMultiply(V2, V); 4731 V4 = XMVectorMultiply(V2, V2); 4732 V5 = XMVectorMultiply(V3, V2); 4733 V6 = XMVectorMultiply(V3, V3); 4734 V7 = XMVectorMultiply(V4, V3); 4735 4736 S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]); 4737 S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]); 4738 S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]); 4739 4740 C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]); 4741 C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]); 4742 C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]); 4743 C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]); 4744 4745 Sin = XMVectorMultiplyAdd(S1, V3, V); 4746 Sin = XMVectorMultiplyAdd(S2, V5, Sin); 4747 Sin = XMVectorMultiplyAdd(S3, V7, Sin); 4748 4749 Cos = XMVectorMultiplyAdd(C1, V2, C0); 4750 Cos = XMVectorMultiplyAdd(C2, V4, Cos); 4751 Cos = XMVectorMultiplyAdd(C3, V6, Cos); 4752 4753 *pSin = Sin; 4754 *pCos = Cos; 4755#else // _XM_VMX128_INTRINSICS_ 4756#endif // _XM_VMX128_INTRINSICS_ 4757} 4758 4759//------------------------------------------------------------------------------ 4760 4761XMFINLINE XMVECTOR XMVectorTanEst 4762( 4763 FXMVECTOR V 4764) 4765{ 4766#if defined(_XM_NO_INTRINSICS_) 4767 4768 XMVECTOR V1, V2, V1T0, V1T1, V2T2; 4769 XMVECTOR T0, T1, T2; 4770 XMVECTOR N, D; 4771 XMVECTOR OneOverPi; 4772 XMVECTOR Result; 4773 4774 OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v); 4775 4776 V1 = XMVectorMultiply(V, OneOverPi); 4777 V1 = XMVectorRound(V1); 4778 4779 V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V); 4780 4781 T0 = XMVectorSplatX(g_XMTanEstCoefficients.v); 4782 T1 = XMVectorSplatY(g_XMTanEstCoefficients.v); 4783 T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v); 4784 4785 V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2); 4786 V2 = XMVectorMultiply(V1, V1); 4787 V1T0 = XMVectorMultiply(V1, T0); 4788 V1T1 = XMVectorMultiply(V1, T1); 4789 4790 D = XMVectorReciprocalEst(V2T2); 4791 N = XMVectorMultiplyAdd(V2, V1T1, V1T0); 4792 4793 Result = XMVectorMultiply(N, D); 4794 4795 return Result; 4796 4797#elif defined(_XM_SSE_INTRINSICS_) 4798 XMVECTOR V1, V2, V1T0, V1T1, V2T2; 4799 XMVECTOR T0, T1, T2; 4800 XMVECTOR N, D; 4801 XMVECTOR OneOverPi; 4802 XMVECTOR Result; 4803 4804 OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients); 4805 4806 V1 = XMVectorMultiply(V, OneOverPi); 4807 V1 = XMVectorRound(V1); 4808 4809 V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V); 4810 4811 T0 = XMVectorSplatX(g_XMTanEstCoefficients); 4812 T1 = XMVectorSplatY(g_XMTanEstCoefficients); 4813 T2 = XMVectorSplatZ(g_XMTanEstCoefficients); 4814 4815 V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2); 4816 V2 = XMVectorMultiply(V1, V1); 4817 V1T0 = XMVectorMultiply(V1, T0); 4818 V1T1 = XMVectorMultiply(V1, T1); 4819 4820 D = XMVectorReciprocalEst(V2T2); 4821 N = XMVectorMultiplyAdd(V2, V1T1, V1T0); 4822 4823 Result = XMVectorMultiply(N, D); 4824 4825 return Result; 4826#else // _XM_VMX128_INTRINSICS_ 4827#endif // _XM_VMX128_INTRINSICS_ 4828} 4829 4830//------------------------------------------------------------------------------ 4831 4832XMFINLINE XMVECTOR XMVectorSinHEst 4833( 4834 FXMVECTOR V 4835) 4836{ 4837#if defined(_XM_NO_INTRINSICS_) 4838 4839 XMVECTOR V1, V2; 4840 XMVECTOR E1, E2; 4841 XMVECTOR Result; 4842 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 4843 4844 V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v); 4845 V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v); 4846 4847 E1 = XMVectorExpEst(V1); 4848 E2 = XMVectorExpEst(V2); 4849 4850 Result = XMVectorSubtract(E1, E2); 4851 4852 return Result; 4853 4854#elif defined(_XM_SSE_INTRINSICS_) 4855 XMVECTOR V1, V2; 4856 XMVECTOR E1, E2; 4857 XMVECTOR Result; 4858 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 4859 4860 V1 = _mm_mul_ps(V,Scale); 4861 V1 = _mm_add_ps(V1,g_XMNegativeOne); 4862 V2 = _mm_mul_ps(V,Scale); 4863 V2 = _mm_sub_ps(g_XMNegativeOne,V2); 4864 E1 = XMVectorExpEst(V1); 4865 E2 = XMVectorExpEst(V2); 4866 Result = _mm_sub_ps(E1, E2); 4867 return Result; 4868#else // _XM_VMX128_INTRINSICS_ 4869#endif // _XM_VMX128_INTRINSICS_ 4870} 4871 4872//------------------------------------------------------------------------------ 4873 4874XMFINLINE XMVECTOR XMVectorCosHEst 4875( 4876 FXMVECTOR V 4877) 4878{ 4879#if defined(_XM_NO_INTRINSICS_) 4880 4881 XMVECTOR V1, V2; 4882 XMVECTOR E1, E2; 4883 XMVECTOR Result; 4884 static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 4885 4886 V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v); 4887 V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v); 4888 4889 E1 = XMVectorExpEst(V1); 4890 E2 = XMVectorExpEst(V2); 4891 4892 Result = XMVectorAdd(E1, E2); 4893 4894 return Result; 4895 4896#elif defined(_XM_SSE_INTRINSICS_) 4897 XMVECTOR V1, V2; 4898 XMVECTOR E1, E2; 4899 XMVECTOR Result; 4900 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) 4901 4902 V1 = _mm_mul_ps(V,Scale); 4903 V1 = _mm_add_ps(V1,g_XMNegativeOne); 4904 V2 = _mm_mul_ps(V, Scale); 4905 V2 = _mm_sub_ps(g_XMNegativeOne,V2); 4906 E1 = XMVectorExpEst(V1); 4907 E2 = XMVectorExpEst(V2); 4908 Result = _mm_add_ps(E1, E2); 4909 return Result; 4910#else // _XM_VMX128_INTRINSICS_ 4911#endif // _XM_VMX128_INTRINSICS_ 4912} 4913 4914//------------------------------------------------------------------------------ 4915 4916XMFINLINE XMVECTOR XMVectorTanHEst 4917( 4918 FXMVECTOR V 4919) 4920{ 4921#if defined(_XM_NO_INTRINSICS_) 4922 4923 XMVECTOR E; 4924 XMVECTOR Result; 4925 static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) 4926 4927 E = XMVectorMultiply(V, Scale); 4928 E = XMVectorExpEst(E); 4929 E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v); 4930 E = XMVectorReciprocalEst(E); 4931 4932 Result = XMVectorSubtract(g_XMOne.v, E); 4933 4934 return Result; 4935 4936#elif defined(_XM_SSE_INTRINSICS_) 4937 static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) 4938 4939 XMVECTOR E = _mm_mul_ps(V, Scale); 4940 E = XMVectorExpEst(E); 4941 E = _mm_mul_ps(E,g_XMOneHalf); 4942 E = _mm_add_ps(E,g_XMOneHalf); 4943 E = XMVectorReciprocalEst(E); 4944 E = _mm_sub_ps(g_XMOne, E); 4945 return E; 4946#else // _XM_VMX128_INTRINSICS_ 4947#endif // _XM_VMX128_INTRINSICS_ 4948} 4949 4950//------------------------------------------------------------------------------ 4951 4952XMFINLINE XMVECTOR XMVectorASinEst 4953( 4954 FXMVECTOR V 4955) 4956{ 4957#if defined(_XM_NO_INTRINSICS_) 4958 4959 XMVECTOR AbsV, V2, VD, VC0, V2C3; 4960 XMVECTOR C0, C1, C2, C3; 4961 XMVECTOR D, Rsq, SqrtD; 4962 XMVECTOR OnePlusEps; 4963 XMVECTOR Result; 4964 4965 AbsV = XMVectorAbs(V); 4966 4967 OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v); 4968 4969 C0 = XMVectorSplatX(g_XMASinEstCoefficients.v); 4970 C1 = XMVectorSplatY(g_XMASinEstCoefficients.v); 4971 C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v); 4972 C3 = XMVectorSplatW(g_XMASinEstCoefficients.v); 4973 4974 D = XMVectorSubtract(OnePlusEps, AbsV); 4975 4976 Rsq = XMVectorReciprocalSqrtEst(D); 4977 SqrtD = XMVectorMultiply(D, Rsq); 4978 4979 V2 = XMVectorMultiply(V, AbsV); 4980 V2C3 = XMVectorMultiply(V2, C3); 4981 VD = XMVectorMultiply(D, AbsV); 4982 VC0 = XMVectorMultiply(V, C0); 4983 4984 Result = XMVectorMultiply(V, C1); 4985 Result = XMVectorMultiplyAdd(V2, C2, Result); 4986 Result = XMVectorMultiplyAdd(V2C3, VD, Result); 4987 Result = XMVectorMultiplyAdd(VC0, SqrtD, Result); 4988 4989 return Result; 4990 4991#elif defined(_XM_SSE_INTRINSICS_) 4992 // Get abs(V) 4993 XMVECTOR vAbsV = _mm_setzero_ps(); 4994 vAbsV = _mm_sub_ps(vAbsV,V); 4995 vAbsV = _mm_max_ps(vAbsV,V); 4996 4997 XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]); 4998 D = _mm_sub_ps(D,vAbsV); 4999 // Since this is an estimate, rqsrt is okay 5000 XMVECTOR vConstants = _mm_rsqrt_ps(D); 5001 XMVECTOR SqrtD = _mm_mul_ps(D,vConstants); 5002 // V2 = V^2 retaining sign 5003 XMVECTOR V2 = _mm_mul_ps(V,vAbsV); 5004 D = _mm_mul_ps(D,vAbsV); 5005 5006 XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]); 5007 vResult = _mm_mul_ps(vResult,V); 5008 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]); 5009 vConstants = _mm_mul_ps(vConstants,V2); 5010 vResult = _mm_add_ps(vResult,vConstants); 5011 5012 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]); 5013 vConstants = _mm_mul_ps(vConstants,V2); 5014 vConstants = _mm_mul_ps(vConstants,D); 5015 vResult = _mm_add_ps(vResult,vConstants); 5016 5017 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]); 5018 vConstants = _mm_mul_ps(vConstants,V); 5019 vConstants = _mm_mul_ps(vConstants,SqrtD); 5020 vResult = _mm_add_ps(vResult,vConstants); 5021 return vResult; 5022#else // _XM_VMX128_INTRINSICS_ 5023#endif // _XM_VMX128_INTRINSICS_ 5024} 5025 5026//------------------------------------------------------------------------------ 5027 5028XMFINLINE XMVECTOR XMVectorACosEst 5029( 5030 FXMVECTOR V 5031) 5032{ 5033#if defined(_XM_NO_INTRINSICS_) 5034 5035 XMVECTOR AbsV, V2, VD, VC0, V2C3; 5036 XMVECTOR C0, C1, C2, C3; 5037 XMVECTOR D, Rsq, SqrtD; 5038 XMVECTOR OnePlusEps, HalfPi; 5039 XMVECTOR Result; 5040 5041 // acos(V) = PI / 2 - asin(V) 5042 5043 AbsV = XMVectorAbs(V); 5044 5045 OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v); 5046 HalfPi = XMVectorSplatY(g_XMASinEstConstants.v); 5047 5048 C0 = XMVectorSplatX(g_XMASinEstCoefficients.v); 5049 C1 = XMVectorSplatY(g_XMASinEstCoefficients.v); 5050 C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v); 5051 C3 = XMVectorSplatW(g_XMASinEstCoefficients.v); 5052 5053 D = XMVectorSubtract(OnePlusEps, AbsV); 5054 5055 Rsq = XMVectorReciprocalSqrtEst(D); 5056 SqrtD = XMVectorMultiply(D, Rsq); 5057 5058 V2 = XMVectorMultiply(V, AbsV); 5059 V2C3 = XMVectorMultiply(V2, C3); 5060 VD = XMVectorMultiply(D, AbsV); 5061 VC0 = XMVectorMultiply(V, C0); 5062 5063 Result = XMVectorMultiply(V, C1); 5064 Result = XMVectorMultiplyAdd(V2, C2, Result); 5065 Result = XMVectorMultiplyAdd(V2C3, VD, Result); 5066 Result = XMVectorMultiplyAdd(VC0, SqrtD, Result); 5067 Result = XMVectorSubtract(HalfPi, Result); 5068 5069 return Result; 5070 5071#elif defined(_XM_SSE_INTRINSICS_) 5072 // acos(V) = PI / 2 - asin(V) 5073 // Get abs(V) 5074 XMVECTOR vAbsV = _mm_setzero_ps(); 5075 vAbsV = _mm_sub_ps(vAbsV,V); 5076 vAbsV = _mm_max_ps(vAbsV,V); 5077 // Calc D 5078 XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]); 5079 D = _mm_sub_ps(D,vAbsV); 5080 // SqrtD = sqrt(D-abs(V)) estimated 5081 XMVECTOR vConstants = _mm_rsqrt_ps(D); 5082 XMVECTOR SqrtD = _mm_mul_ps(D,vConstants); 5083 // V2 = V^2 while retaining sign 5084 XMVECTOR V2 = _mm_mul_ps(V, vAbsV); 5085 // Drop vAbsV here. D = (Const-abs(V))*abs(V) 5086 D = _mm_mul_ps(D, vAbsV); 5087 5088 XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]); 5089 vResult = _mm_mul_ps(vResult,V); 5090 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]); 5091 vConstants = _mm_mul_ps(vConstants,V2); 5092 vResult = _mm_add_ps(vResult,vConstants); 5093 5094 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]); 5095 vConstants = _mm_mul_ps(vConstants,V2); 5096 vConstants = _mm_mul_ps(vConstants,D); 5097 vResult = _mm_add_ps(vResult,vConstants); 5098 5099 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]); 5100 vConstants = _mm_mul_ps(vConstants,V); 5101 vConstants = _mm_mul_ps(vConstants,SqrtD); 5102 vResult = _mm_add_ps(vResult,vConstants); 5103 5104 vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]); 5105 vResult = _mm_sub_ps(vConstants,vResult); 5106 return vResult; 5107#else // _XM_VMX128_INTRINSICS_ 5108#endif // _XM_VMX128_INTRINSICS_ 5109} 5110 5111//------------------------------------------------------------------------------ 5112 5113XMFINLINE XMVECTOR XMVectorATanEst 5114( 5115 FXMVECTOR V 5116) 5117{ 5118#if defined(_XM_NO_INTRINSICS_) 5119 5120 XMVECTOR AbsV, V2S2, N, D; 5121 XMVECTOR S0, S1, S2; 5122 XMVECTOR HalfPi; 5123 XMVECTOR Result; 5124 5125 S0 = XMVectorSplatX(g_XMATanEstCoefficients.v); 5126 S1 = XMVectorSplatY(g_XMATanEstCoefficients.v); 5127 S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v); 5128 HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v); 5129 5130 AbsV = XMVectorAbs(V); 5131 5132 V2S2 = XMVectorMultiplyAdd(V, V, S2); 5133 N = XMVectorMultiplyAdd(AbsV, HalfPi, S0); 5134 D = XMVectorMultiplyAdd(AbsV, S1, V2S2); 5135 N = XMVectorMultiply(N, V); 5136 D = XMVectorReciprocalEst(D); 5137 5138 Result = XMVectorMultiply(N, D); 5139 5140 return Result; 5141 5142#elif defined(_XM_SSE_INTRINSICS_) 5143 // Get abs(V) 5144 XMVECTOR vAbsV = _mm_setzero_ps(); 5145 vAbsV = _mm_sub_ps(vAbsV,V); 5146 vAbsV = _mm_max_ps(vAbsV,V); 5147 5148 XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]); 5149 vResult = _mm_mul_ps(vResult,vAbsV); 5150 XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]); 5151 vResult = _mm_add_ps(vResult,vConstants); 5152 vResult = _mm_mul_ps(vResult,V); 5153 5154 XMVECTOR D = _mm_mul_ps(V,V); 5155 vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]); 5156 D = _mm_add_ps(D,vConstants); 5157 vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]); 5158 vConstants = _mm_mul_ps(vConstants,vAbsV); 5159 D = _mm_add_ps(D,vConstants); 5160 vResult = _mm_div_ps(vResult,D); 5161 return vResult; 5162#else // _XM_VMX128_INTRINSICS_ 5163#endif // _XM_VMX128_INTRINSICS_ 5164} 5165 5166//------------------------------------------------------------------------------ 5167 5168XMFINLINE XMVECTOR XMVectorATan2Est 5169( 5170 FXMVECTOR Y, 5171 FXMVECTOR X 5172) 5173{ 5174#if defined(_XM_NO_INTRINSICS_) 5175 5176 XMVECTOR Reciprocal; 5177 XMVECTOR V; 5178 XMVECTOR YSign; 5179 XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour; 5180 XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity; 5181 XMVECTOR ATanResultValid; 5182 XMVECTOR R0, R1, R2, R3, R4, R5; 5183 XMVECTOR Zero; 5184 XMVECTOR Result; 5185 static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; 5186 5187 Zero = XMVectorZero(); 5188 ATanResultValid = XMVectorTrueInt(); 5189 5190 Pi = XMVectorSplatX(ATan2Constants); 5191 PiOverTwo = XMVectorSplatY(ATan2Constants); 5192 PiOverFour = XMVectorSplatZ(ATan2Constants); 5193 ThreePiOverFour = XMVectorSplatW(ATan2Constants); 5194 5195 YEqualsZero = XMVectorEqual(Y, Zero); 5196 XEqualsZero = XMVectorEqual(X, Zero); 5197 XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); 5198 XIsPositive = XMVectorEqualInt(XIsPositive, Zero); 5199 YEqualsInfinity = XMVectorIsInfinite(Y); 5200 XEqualsInfinity = XMVectorIsInfinite(X); 5201 5202 YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); 5203 Pi = XMVectorOrInt(Pi, YSign); 5204 PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); 5205 PiOverFour = XMVectorOrInt(PiOverFour, YSign); 5206 ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); 5207 5208 R1 = XMVectorSelect(Pi, YSign, XIsPositive); 5209 R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); 5210 R3 = XMVectorSelect(R2, R1, YEqualsZero); 5211 R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); 5212 R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); 5213 Result = XMVectorSelect(R3, R5, YEqualsInfinity); 5214 ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); 5215 5216 Reciprocal = XMVectorReciprocalEst(X); 5217 V = XMVectorMultiply(Y, Reciprocal); 5218 R0 = XMVectorATanEst(V); 5219 5220 R1 = XMVectorSelect( Pi, Zero, XIsPositive ); 5221 R2 = XMVectorAdd(R0, R1); 5222 5223 Result = XMVectorSelect(Result, R2, ATanResultValid); 5224 5225 return Result; 5226 5227#elif defined(_XM_SSE_INTRINSICS_) 5228 static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; 5229 5230 // Mask if Y>0 && Y!=INF 5231 XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); 5232 // Get the sign of (Y&0x80000000) 5233 XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero); 5234 // Get the sign bits of X 5235 XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero); 5236 // Change them to masks 5237 XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero); 5238 // Get Pi 5239 XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]); 5240 // Copy the sign of Y 5241 Pi = _mm_or_ps(Pi,YSign); 5242 XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive); 5243 // Mask for X==0 5244 XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero); 5245 // Get Pi/2 with with sign of Y 5246 XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]); 5247 PiOverTwo = _mm_or_ps(PiOverTwo,YSign); 5248 XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants); 5249 // Mask for Y==0 5250 vConstants = _mm_cmpeq_ps(Y,g_XMZero); 5251 R2 = XMVectorSelect(R2,R1,vConstants); 5252 // Get Pi/4 with sign of Y 5253 XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]); 5254 PiOverFour = _mm_or_ps(PiOverFour,YSign); 5255 // Get (Pi*3)/4 with sign of Y 5256 XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]); 5257 ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign); 5258 vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); 5259 XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); 5260 vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity); 5261 5262 XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity); 5263 vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity); 5264 // At this point, any entry that's zero will get the result 5265 // from XMVectorATan(), otherwise, return the failsafe value 5266 vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity); 5267 // Any entries not 0xFFFFFFFF, are considered precalculated 5268 XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask); 5269 // Let's do the ATan2 function 5270 XMVECTOR Reciprocal = _mm_rcp_ps(X); 5271 vConstants = _mm_mul_ps(Y, Reciprocal); 5272 vConstants = XMVectorATanEst(vConstants); 5273 // Discard entries that have been declared void 5274 5275 XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive ); 5276 vConstants = _mm_add_ps( vConstants, R3 ); 5277 5278 vResult = XMVectorSelect(vResult,vConstants,ATanResultValid); 5279 return vResult; 5280#else // _XM_VMX128_INTRINSICS_ 5281#endif // _XM_VMX128_INTRINSICS_ 5282} 5283 5284//------------------------------------------------------------------------------ 5285 5286XMFINLINE XMVECTOR XMVectorLerp 5287( 5288 FXMVECTOR V0, 5289 FXMVECTOR V1, 5290 FLOAT t 5291) 5292{ 5293#if defined(_XM_NO_INTRINSICS_) 5294 5295 XMVECTOR Scale; 5296 XMVECTOR Length; 5297 XMVECTOR Result; 5298 5299 // V0 + t * (V1 - V0) 5300 Scale = XMVectorReplicate(t); 5301 Length = XMVectorSubtract(V1, V0); 5302 Result = XMVectorMultiplyAdd(Length, Scale, V0); 5303 5304 return Result; 5305 5306#elif defined(_XM_SSE_INTRINSICS_) 5307 XMVECTOR L, S; 5308 XMVECTOR Result; 5309 5310 L = _mm_sub_ps( V1, V0 ); 5311 5312 S = _mm_set_ps1( t ); 5313 5314 Result = _mm_mul_ps( L, S ); 5315 5316 return _mm_add_ps( Result, V0 ); 5317#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 5318#endif // _XM_VMX128_INTRINSICS_ 5319} 5320 5321//------------------------------------------------------------------------------ 5322 5323XMFINLINE XMVECTOR XMVectorLerpV 5324( 5325 FXMVECTOR V0, 5326 FXMVECTOR V1, 5327 FXMVECTOR T 5328) 5329{ 5330#if defined(_XM_NO_INTRINSICS_) 5331 5332 XMVECTOR Length; 5333 XMVECTOR Result; 5334 5335 // V0 + T * (V1 - V0) 5336 Length = XMVectorSubtract(V1, V0); 5337 Result = XMVectorMultiplyAdd(Length, T, V0); 5338 5339 return Result; 5340 5341#elif defined(_XM_SSE_INTRINSICS_) 5342 XMVECTOR Length; 5343 XMVECTOR Result; 5344 5345 Length = _mm_sub_ps( V1, V0 ); 5346 5347 Result = _mm_mul_ps( Length, T ); 5348 5349 return _mm_add_ps( Result, V0 ); 5350#else // _XM_VMX128_INTRINSICS_ 5351#endif // _XM_VMX128_INTRINSICS_ 5352} 5353 5354//------------------------------------------------------------------------------ 5355 5356XMFINLINE XMVECTOR XMVectorHermite 5357( 5358 FXMVECTOR Position0, 5359 FXMVECTOR Tangent0, 5360 FXMVECTOR Position1, 5361 CXMVECTOR Tangent1, 5362 FLOAT t 5363) 5364{ 5365#if defined(_XM_NO_INTRINSICS_) 5366 5367 XMVECTOR P0; 5368 XMVECTOR T0; 5369 XMVECTOR P1; 5370 XMVECTOR T1; 5371 XMVECTOR Result; 5372 FLOAT t2; 5373 FLOAT t3; 5374 5375 // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + 5376 // (t^3 - 2 * t^2 + t) * Tangent0 + 5377 // (-2 * t^3 + 3 * t^2) * Position1 + 5378 // (t^3 - t^2) * Tangent1 5379 t2 = t * t; 5380 t3 = t * t2; 5381 5382 P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f); 5383 T0 = XMVectorReplicate(t3 - 2.0f * t2 + t); 5384 P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2); 5385 T1 = XMVectorReplicate(t3 - t2); 5386 5387 Result = XMVectorMultiply(P0, Position0); 5388 Result = XMVectorMultiplyAdd(T0, Tangent0, Result); 5389 Result = XMVectorMultiplyAdd(P1, Position1, Result); 5390 Result = XMVectorMultiplyAdd(T1, Tangent1, Result); 5391 5392 return Result; 5393 5394#elif defined(_XM_SSE_INTRINSICS_) 5395 FLOAT t2 = t * t; 5396 FLOAT t3 = t * t2; 5397 5398 XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f); 5399 XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t); 5400 XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2); 5401 XMVECTOR T1 = _mm_set_ps1(t3 - t2); 5402 5403 XMVECTOR vResult = _mm_mul_ps(P0, Position0); 5404 XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0); 5405 vResult = _mm_add_ps(vResult,vTemp); 5406 vTemp = _mm_mul_ps(P1, Position1); 5407 vResult = _mm_add_ps(vResult,vTemp); 5408 vTemp = _mm_mul_ps(T1, Tangent1); 5409 vResult = _mm_add_ps(vResult,vTemp); 5410 return vResult; 5411#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 5412#endif // _XM_VMX128_INTRINSICS_ 5413} 5414 5415//------------------------------------------------------------------------------ 5416 5417XMFINLINE XMVECTOR XMVectorHermiteV 5418( 5419 FXMVECTOR Position0, 5420 FXMVECTOR Tangent0, 5421 FXMVECTOR Position1, 5422 CXMVECTOR Tangent1, 5423 CXMVECTOR T 5424) 5425{ 5426#if defined(_XM_NO_INTRINSICS_) 5427 5428 XMVECTOR P0; 5429 XMVECTOR T0; 5430 XMVECTOR P1; 5431 XMVECTOR T1; 5432 XMVECTOR Result; 5433 XMVECTOR T2; 5434 XMVECTOR T3; 5435 5436 // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + 5437 // (t^3 - 2 * t^2 + t) * Tangent0 + 5438 // (-2 * t^3 + 3 * t^2) * Position1 + 5439 // (t^3 - t^2) * Tangent1 5440 T2 = XMVectorMultiply(T, T); 5441 T3 = XMVectorMultiply(T , T2); 5442 5443 P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f); 5444 T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]); 5445 P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]); 5446 T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]); 5447 5448 Result = XMVectorMultiply(P0, Position0); 5449 Result = XMVectorMultiplyAdd(T0, Tangent0, Result); 5450 Result = XMVectorMultiplyAdd(P1, Position1, Result); 5451 Result = XMVectorMultiplyAdd(T1, Tangent1, Result); 5452 5453 return Result; 5454 5455#elif defined(_XM_SSE_INTRINSICS_) 5456 static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f}; 5457 static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f}; 5458 5459 // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + 5460 // (t^3 - 2 * t^2 + t) * Tangent0 + 5461 // (-2 * t^3 + 3 * t^2) * Position1 + 5462 // (t^3 - t^2) * Tangent1 5463 XMVECTOR T2 = _mm_mul_ps(T,T); 5464 XMVECTOR T3 = _mm_mul_ps(T,T2); 5465 // Mul by the constants against t^2 5466 T2 = _mm_mul_ps(T2,CatMulT2); 5467 // Mul by the constants against t^3 5468 T3 = _mm_mul_ps(T3,CatMulT3); 5469 // T3 now has the pre-result. 5470 T3 = _mm_add_ps(T3,T2); 5471 // I need to add t.y only 5472 T2 = _mm_and_ps(T,g_XMMaskY); 5473 T3 = _mm_add_ps(T3,T2); 5474 // Add 1.0f to x 5475 T3 = _mm_add_ps(T3,g_XMIdentityR0); 5476 // Now, I have the constants created 5477 // Mul the x constant to Position0 5478 XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0)); 5479 vResult = _mm_mul_ps(vResult,Position0); 5480 // Mul the y constant to Tangent0 5481 T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1)); 5482 T2 = _mm_mul_ps(T2,Tangent0); 5483 vResult = _mm_add_ps(vResult,T2); 5484 // Mul the z constant to Position1 5485 T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2)); 5486 T2 = _mm_mul_ps(T2,Position1); 5487 vResult = _mm_add_ps(vResult,T2); 5488 // Mul the w constant to Tangent1 5489 T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3)); 5490 T3 = _mm_mul_ps(T3,Tangent1); 5491 vResult = _mm_add_ps(vResult,T3); 5492 return vResult; 5493#else // _XM_VMX128_INTRINSICS_ 5494#endif // _XM_VMX128_INTRINSICS_ 5495} 5496 5497//------------------------------------------------------------------------------ 5498 5499XMFINLINE XMVECTOR XMVectorCatmullRom 5500( 5501 FXMVECTOR Position0, 5502 FXMVECTOR Position1, 5503 FXMVECTOR Position2, 5504 CXMVECTOR Position3, 5505 FLOAT t 5506) 5507{ 5508#if defined(_XM_NO_INTRINSICS_) 5509 5510 XMVECTOR P0; 5511 XMVECTOR P1; 5512 XMVECTOR P2; 5513 XMVECTOR P3; 5514 XMVECTOR Result; 5515 FLOAT t2; 5516 FLOAT t3; 5517 5518 // Result = ((-t^3 + 2 * t^2 - t) * Position0 + 5519 // (3 * t^3 - 5 * t^2 + 2) * Position1 + 5520 // (-3 * t^3 + 4 * t^2 + t) * Position2 + 5521 // (t^3 - t^2) * Position3) * 0.5 5522 t2 = t * t; 5523 t3 = t * t2; 5524 5525 P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f); 5526 P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); 5527 P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); 5528 P3 = XMVectorReplicate((t3 - t2) * 0.5f); 5529 5530 Result = XMVectorMultiply(P0, Position0); 5531 Result = XMVectorMultiplyAdd(P1, Position1, Result); 5532 Result = XMVectorMultiplyAdd(P2, Position2, Result); 5533 Result = XMVectorMultiplyAdd(P3, Position3, Result); 5534 5535 return Result; 5536 5537#elif defined(_XM_SSE_INTRINSICS_) 5538 FLOAT t2 = t * t; 5539 FLOAT t3 = t * t2; 5540 5541 XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f); 5542 XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); 5543 XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); 5544 XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f); 5545 5546 P0 = _mm_mul_ps(P0, Position0); 5547 P1 = _mm_mul_ps(P1, Position1); 5548 P2 = _mm_mul_ps(P2, Position2); 5549 P3 = _mm_mul_ps(P3, Position3); 5550 P0 = _mm_add_ps(P0,P1); 5551 P2 = _mm_add_ps(P2,P3); 5552 P0 = _mm_add_ps(P0,P2); 5553 return P0; 5554#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 5555#endif // _XM_VMX128_INTRINSICS_ 5556} 5557 5558//------------------------------------------------------------------------------ 5559 5560XMFINLINE XMVECTOR XMVectorCatmullRomV 5561( 5562 FXMVECTOR Position0, 5563 FXMVECTOR Position1, 5564 FXMVECTOR Position2, 5565 CXMVECTOR Position3, 5566 CXMVECTOR T 5567) 5568{ 5569#if defined(_XM_NO_INTRINSICS_) 5570 float fx = T.vector4_f32[0]; 5571 float fy = T.vector4_f32[1]; 5572 float fz = T.vector4_f32[2]; 5573 float fw = T.vector4_f32[3]; 5574 XMVECTOR vResult = { 5575 0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.vector4_f32[0]+ 5576 (3*fx*fx*fx-5*fx*fx+2)*Position1.vector4_f32[0]+ 5577 (-3*fx*fx*fx+4*fx*fx+fx)*Position2.vector4_f32[0]+ 5578 (fx*fx*fx-fx*fx)*Position3.vector4_f32[0]), 5579 0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.vector4_f32[1]+ 5580 (3*fy*fy*fy-5*fy*fy+2)*Position1.vector4_f32[1]+ 5581 (-3*fy*fy*fy+4*fy*fy+fy)*Position2.vector4_f32[1]+ 5582 (fy*fy*fy-fy*fy)*Position3.vector4_f32[1]), 5583 0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.vector4_f32[2]+ 5584 (3*fz*fz*fz-5*fz*fz+2)*Position1.vector4_f32[2]+ 5585 (-3*fz*fz*fz+4*fz*fz+fz)*Position2.vector4_f32[2]+ 5586 (fz*fz*fz-fz*fz)*Position3.vector4_f32[2]), 5587 0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.vector4_f32[3]+ 5588 (3*fw*fw*fw-5*fw*fw+2)*Position1.vector4_f32[3]+ 5589 (-3*fw*fw*fw+4*fw*fw+fw)*Position2.vector4_f32[3]+ 5590 (fw*fw*fw-fw*fw)*Position3.vector4_f32[3]) 5591 }; 5592 return vResult; 5593#elif defined(_XM_SSE_INTRINSICS_) 5594 static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f}; 5595 static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f}; 5596 static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f}; 5597 static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f}; 5598 // Cache T^2 and T^3 5599 XMVECTOR T2 = _mm_mul_ps(T,T); 5600 XMVECTOR T3 = _mm_mul_ps(T,T2); 5601 // Perform the Position0 term 5602 XMVECTOR vResult = _mm_add_ps(T2,T2); 5603 vResult = _mm_sub_ps(vResult,T); 5604 vResult = _mm_sub_ps(vResult,T3); 5605 vResult = _mm_mul_ps(vResult,Position0); 5606 // Perform the Position1 term and add 5607 XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3); 5608 XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5); 5609 vTemp = _mm_sub_ps(vTemp,vTemp2); 5610 vTemp = _mm_add_ps(vTemp,Catmul2); 5611 vTemp = _mm_mul_ps(vTemp,Position1); 5612 vResult = _mm_add_ps(vResult,vTemp); 5613 // Perform the Position2 term and add 5614 vTemp = _mm_mul_ps(T2,Catmul4); 5615 vTemp2 = _mm_mul_ps(T3,Catmul3); 5616 vTemp = _mm_sub_ps(vTemp,vTemp2); 5617 vTemp = _mm_add_ps(vTemp,T); 5618 vTemp = _mm_mul_ps(vTemp,Position2); 5619 vResult = _mm_add_ps(vResult,vTemp); 5620 // Position3 is the last term 5621 T3 = _mm_sub_ps(T3,T2); 5622 T3 = _mm_mul_ps(T3,Position3); 5623 vResult = _mm_add_ps(vResult,T3); 5624 // Multiply by 0.5f and exit 5625 vResult = _mm_mul_ps(vResult,g_XMOneHalf); 5626 return vResult; 5627#else // _XM_VMX128_INTRINSICS_ 5628#endif // _XM_VMX128_INTRINSICS_ 5629} 5630 5631//------------------------------------------------------------------------------ 5632 5633XMFINLINE XMVECTOR XMVectorBaryCentric 5634( 5635 FXMVECTOR Position0, 5636 FXMVECTOR Position1, 5637 FXMVECTOR Position2, 5638 FLOAT f, 5639 FLOAT g 5640) 5641{ 5642#if defined(_XM_NO_INTRINSICS_) 5643 5644 // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0) 5645 XMVECTOR P10; 5646 XMVECTOR P20; 5647 XMVECTOR ScaleF; 5648 XMVECTOR ScaleG; 5649 XMVECTOR Result; 5650 5651 P10 = XMVectorSubtract(Position1, Position0); 5652 ScaleF = XMVectorReplicate(f); 5653 5654 P20 = XMVectorSubtract(Position2, Position0); 5655 ScaleG = XMVectorReplicate(g); 5656 5657 Result = XMVectorMultiplyAdd(P10, ScaleF, Position0); 5658 Result = XMVectorMultiplyAdd(P20, ScaleG, Result); 5659 5660 return Result; 5661 5662#elif defined(_XM_SSE_INTRINSICS_) 5663 XMVECTOR R1 = _mm_sub_ps(Position1,Position0); 5664 XMVECTOR SF = _mm_set_ps1(f); 5665 XMVECTOR R2 = _mm_sub_ps(Position2,Position0); 5666 XMVECTOR SG = _mm_set_ps1(g); 5667 R1 = _mm_mul_ps(R1,SF); 5668 R2 = _mm_mul_ps(R2,SG); 5669 R1 = _mm_add_ps(R1,Position0); 5670 R1 = _mm_add_ps(R1,R2); 5671 return R1; 5672#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 5673#endif // _XM_VMX128_INTRINSICS_ 5674} 5675 5676//------------------------------------------------------------------------------ 5677 5678XMFINLINE XMVECTOR XMVectorBaryCentricV 5679( 5680 FXMVECTOR Position0, 5681 FXMVECTOR Position1, 5682 FXMVECTOR Position2, 5683 CXMVECTOR F, 5684 CXMVECTOR G 5685) 5686{ 5687#if defined(_XM_NO_INTRINSICS_) 5688 5689 // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0) 5690 XMVECTOR P10; 5691 XMVECTOR P20; 5692 XMVECTOR Result; 5693 5694 P10 = XMVectorSubtract(Position1, Position0); 5695 P20 = XMVectorSubtract(Position2, Position0); 5696 5697 Result = XMVectorMultiplyAdd(P10, F, Position0); 5698 Result = XMVectorMultiplyAdd(P20, G, Result); 5699 5700 return Result; 5701 5702#elif defined(_XM_SSE_INTRINSICS_) 5703 XMVECTOR R1 = _mm_sub_ps(Position1,Position0); 5704 XMVECTOR R2 = _mm_sub_ps(Position2,Position0); 5705 R1 = _mm_mul_ps(R1,F); 5706 R2 = _mm_mul_ps(R2,G); 5707 R1 = _mm_add_ps(R1,Position0); 5708 R1 = _mm_add_ps(R1,R2); 5709 return R1; 5710#else // _XM_VMX128_INTRINSICS_ 5711#endif // _XM_VMX128_INTRINSICS_ 5712} 5713 5714/**************************************************************************** 5715 * 5716 * 2D Vector 5717 * 5718 ****************************************************************************/ 5719 5720//------------------------------------------------------------------------------ 5721// Comparison operations 5722//------------------------------------------------------------------------------ 5723 5724//------------------------------------------------------------------------------ 5725 5726XMFINLINE BOOL XMVector2Equal 5727( 5728 FXMVECTOR V1, 5729 FXMVECTOR V2 5730) 5731{ 5732#if defined(_XM_NO_INTRINSICS_) 5733 return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0); 5734#elif defined(_XM_SSE_INTRINSICS_) 5735 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 5736// z and w are don't care 5737 return (((_mm_movemask_ps(vTemp)&3)==3) != 0); 5738#else // _XM_VMX128_INTRINSICS_ 5739 return XMComparisonAllTrue(XMVector2EqualR(V1, V2)); 5740#endif 5741} 5742 5743//------------------------------------------------------------------------------ 5744 5745XMFINLINE UINT XMVector2EqualR 5746( 5747 FXMVECTOR V1, 5748 FXMVECTOR V2 5749) 5750{ 5751#if defined(_XM_NO_INTRINSICS_) 5752 5753 UINT CR = 0; 5754 5755 if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && 5756 (V1.vector4_f32[1] == V2.vector4_f32[1])) 5757 { 5758 CR = XM_CRMASK_CR6TRUE; 5759 } 5760 else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && 5761 (V1.vector4_f32[1] != V2.vector4_f32[1])) 5762 { 5763 CR = XM_CRMASK_CR6FALSE; 5764 } 5765 return CR; 5766#elif defined(_XM_SSE_INTRINSICS_) 5767 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 5768// z and w are don't care 5769 int iTest = _mm_movemask_ps(vTemp)&3; 5770 UINT CR = 0; 5771 if (iTest==3) 5772 { 5773 CR = XM_CRMASK_CR6TRUE; 5774 } 5775 else if (!iTest) 5776 { 5777 CR = XM_CRMASK_CR6FALSE; 5778 } 5779 return CR; 5780#else // _XM_VMX128_INTRINSICS_ 5781#endif // _XM_VMX128_INTRINSICS_ 5782} 5783 5784//------------------------------------------------------------------------------ 5785 5786XMFINLINE BOOL XMVector2EqualInt 5787( 5788 FXMVECTOR V1, 5789 FXMVECTOR V2 5790) 5791{ 5792#if defined(_XM_NO_INTRINSICS_) 5793 return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0); 5794#elif defined(_XM_SSE_INTRINSICS_) 5795 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 5796 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0); 5797#else // _XM_VMX128_INTRINSICS_ 5798 return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2)); 5799#endif 5800} 5801 5802//------------------------------------------------------------------------------ 5803 5804XMFINLINE UINT XMVector2EqualIntR 5805( 5806 FXMVECTOR V1, 5807 FXMVECTOR V2 5808) 5809{ 5810#if defined(_XM_NO_INTRINSICS_) 5811 5812 UINT CR = 0; 5813 if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && 5814 (V1.vector4_u32[1] == V2.vector4_u32[1])) 5815 { 5816 CR = XM_CRMASK_CR6TRUE; 5817 } 5818 else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && 5819 (V1.vector4_u32[1] != V2.vector4_u32[1])) 5820 { 5821 CR = XM_CRMASK_CR6FALSE; 5822 } 5823 return CR; 5824 5825#elif defined(_XM_SSE_INTRINSICS_) 5826 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 5827 int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3; 5828 UINT CR = 0; 5829 if (iTest==3) 5830 { 5831 CR = XM_CRMASK_CR6TRUE; 5832 } 5833 else if (!iTest) 5834 { 5835 CR = XM_CRMASK_CR6FALSE; 5836 } 5837 return CR; 5838#else // _XM_VMX128_INTRINSICS_ 5839#endif // _XM_VMX128_INTRINSICS_ 5840} 5841 5842//------------------------------------------------------------------------------ 5843 5844XMFINLINE BOOL XMVector2NearEqual 5845( 5846 FXMVECTOR V1, 5847 FXMVECTOR V2, 5848 FXMVECTOR Epsilon 5849) 5850{ 5851#if defined(_XM_NO_INTRINSICS_) 5852 FLOAT dx, dy; 5853 dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]); 5854 dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]); 5855 return ((dx <= Epsilon.vector4_f32[0]) && 5856 (dy <= Epsilon.vector4_f32[1])); 5857#elif defined(_XM_SSE_INTRINSICS_) 5858 // Get the difference 5859 XMVECTOR vDelta = _mm_sub_ps(V1,V2); 5860 // Get the absolute value of the difference 5861 XMVECTOR vTemp = _mm_setzero_ps(); 5862 vTemp = _mm_sub_ps(vTemp,vDelta); 5863 vTemp = _mm_max_ps(vTemp,vDelta); 5864 vTemp = _mm_cmple_ps(vTemp,Epsilon); 5865 // z and w are don't care 5866 return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0); 5867#else // _XM_VMX128_INTRINSICS_ 5868#endif // _XM_VMX128_INTRINSICS_ 5869} 5870 5871//------------------------------------------------------------------------------ 5872 5873XMFINLINE BOOL XMVector2NotEqual 5874( 5875 FXMVECTOR V1, 5876 FXMVECTOR V2 5877) 5878{ 5879#if defined(_XM_NO_INTRINSICS_) 5880 return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0); 5881#elif defined(_XM_SSE_INTRINSICS_) 5882 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 5883// z and w are don't care 5884 return (((_mm_movemask_ps(vTemp)&3)!=3) != 0); 5885#else // _XM_VMX128_INTRINSICS_ 5886 return XMComparisonAnyFalse(XMVector2EqualR(V1, V2)); 5887#endif 5888} 5889 5890//------------------------------------------------------------------------------ 5891 5892XMFINLINE BOOL XMVector2NotEqualInt 5893( 5894 FXMVECTOR V1, 5895 FXMVECTOR V2 5896) 5897{ 5898#if defined(_XM_NO_INTRINSICS_) 5899 return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0); 5900#elif defined(_XM_SSE_INTRINSICS_) 5901 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 5902 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0); 5903#else // _XM_VMX128_INTRINSICS_ 5904 return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2)); 5905#endif 5906} 5907 5908//------------------------------------------------------------------------------ 5909 5910XMFINLINE BOOL XMVector2Greater 5911( 5912 FXMVECTOR V1, 5913 FXMVECTOR V2 5914) 5915{ 5916#if defined(_XM_NO_INTRINSICS_) 5917 return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0); 5918 5919#elif defined(_XM_SSE_INTRINSICS_) 5920 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); 5921// z and w are don't care 5922 return (((_mm_movemask_ps(vTemp)&3)==3) != 0); 5923#else // _XM_VMX128_INTRINSICS_ 5924 return XMComparisonAllTrue(XMVector2GreaterR(V1, V2)); 5925#endif 5926} 5927 5928//------------------------------------------------------------------------------ 5929 5930XMFINLINE UINT XMVector2GreaterR 5931( 5932 FXMVECTOR V1, 5933 FXMVECTOR V2 5934) 5935{ 5936#if defined(_XM_NO_INTRINSICS_) 5937 5938 UINT CR = 0; 5939 if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && 5940 (V1.vector4_f32[1] > V2.vector4_f32[1])) 5941 { 5942 CR = XM_CRMASK_CR6TRUE; 5943 } 5944 else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && 5945 (V1.vector4_f32[1] <= V2.vector4_f32[1])) 5946 { 5947 CR = XM_CRMASK_CR6FALSE; 5948 } 5949 return CR; 5950#elif defined(_XM_SSE_INTRINSICS_) 5951 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); 5952 int iTest = _mm_movemask_ps(vTemp)&3; 5953 UINT CR = 0; 5954 if (iTest==3) 5955 { 5956 CR = XM_CRMASK_CR6TRUE; 5957 } 5958 else if (!iTest) 5959 { 5960 CR = XM_CRMASK_CR6FALSE; 5961 } 5962 return CR; 5963#else // _XM_VMX128_INTRINSICS_ 5964#endif // _XM_VMX128_INTRINSICS_ 5965} 5966 5967//------------------------------------------------------------------------------ 5968 5969XMFINLINE BOOL XMVector2GreaterOrEqual 5970( 5971 FXMVECTOR V1, 5972 FXMVECTOR V2 5973) 5974{ 5975#if defined(_XM_NO_INTRINSICS_) 5976 return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0); 5977#elif defined(_XM_SSE_INTRINSICS_) 5978 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); 5979 return (((_mm_movemask_ps(vTemp)&3)==3) != 0); 5980#else // _XM_VMX128_INTRINSICS_ 5981 return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2)); 5982#endif 5983} 5984 5985//------------------------------------------------------------------------------ 5986 5987XMFINLINE UINT XMVector2GreaterOrEqualR 5988( 5989 FXMVECTOR V1, 5990 FXMVECTOR V2 5991) 5992{ 5993#if defined(_XM_NO_INTRINSICS_) 5994 UINT CR = 0; 5995 if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && 5996 (V1.vector4_f32[1] >= V2.vector4_f32[1])) 5997 { 5998 CR = XM_CRMASK_CR6TRUE; 5999 } 6000 else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && 6001 (V1.vector4_f32[1] < V2.vector4_f32[1])) 6002 { 6003 CR = XM_CRMASK_CR6FALSE; 6004 } 6005 return CR; 6006 6007#elif defined(_XM_SSE_INTRINSICS_) 6008 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); 6009 int iTest = _mm_movemask_ps(vTemp)&3; 6010 UINT CR = 0; 6011 if (iTest == 3) 6012 { 6013 CR = XM_CRMASK_CR6TRUE; 6014 } 6015 else if (!iTest) 6016 { 6017 CR = XM_CRMASK_CR6FALSE; 6018 } 6019 return CR; 6020#else // _XM_VMX128_INTRINSICS_ 6021#endif // _XM_VMX128_INTRINSICS_ 6022} 6023 6024//------------------------------------------------------------------------------ 6025 6026XMFINLINE BOOL XMVector2Less 6027( 6028 FXMVECTOR V1, 6029 FXMVECTOR V2 6030) 6031{ 6032#if defined(_XM_NO_INTRINSICS_) 6033 return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0); 6034#elif defined(_XM_SSE_INTRINSICS_) 6035 XMVECTOR vTemp = _mm_cmplt_ps(V1,V2); 6036 return (((_mm_movemask_ps(vTemp)&3)==3) != 0); 6037#else // _XM_VMX128_INTRINSICS_ 6038 return XMComparisonAllTrue(XMVector2GreaterR(V2, V1)); 6039#endif 6040} 6041 6042//------------------------------------------------------------------------------ 6043 6044XMFINLINE BOOL XMVector2LessOrEqual 6045( 6046 FXMVECTOR V1, 6047 FXMVECTOR V2 6048) 6049{ 6050#if defined(_XM_NO_INTRINSICS_) 6051 return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0); 6052#elif defined(_XM_SSE_INTRINSICS_) 6053 XMVECTOR vTemp = _mm_cmple_ps(V1,V2); 6054 return (((_mm_movemask_ps(vTemp)&3)==3) != 0); 6055#else // _XM_VMX128_INTRINSICS_ 6056 return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1)); 6057#endif 6058} 6059 6060//------------------------------------------------------------------------------ 6061 6062XMFINLINE BOOL XMVector2InBounds 6063( 6064 FXMVECTOR V, 6065 FXMVECTOR Bounds 6066) 6067{ 6068 #if defined(_XM_NO_INTRINSICS_) 6069 return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 6070 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0); 6071 #elif defined(_XM_SSE_INTRINSICS_) 6072 // Test if less than or equal 6073 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 6074 // Negate the bounds 6075 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 6076 // Test if greater or equal (Reversed) 6077 vTemp2 = _mm_cmple_ps(vTemp2,V); 6078 // Blend answers 6079 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 6080 // x and y in bounds? (z and w are don't care) 6081 return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0); 6082#else // _XM_VMX128_INTRINSICS_ 6083 return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds)); 6084#endif 6085} 6086 6087//------------------------------------------------------------------------------ 6088 6089XMFINLINE UINT XMVector2InBoundsR 6090( 6091 FXMVECTOR V, 6092 FXMVECTOR Bounds 6093) 6094{ 6095#if defined(_XM_NO_INTRINSICS_) 6096 UINT CR = 0; 6097 if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 6098 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) 6099 { 6100 CR = XM_CRMASK_CR6BOUNDS; 6101 } 6102 return CR; 6103 6104#elif defined(_XM_SSE_INTRINSICS_) 6105 // Test if less than or equal 6106 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 6107 // Negate the bounds 6108 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 6109 // Test if greater or equal (Reversed) 6110 vTemp2 = _mm_cmple_ps(vTemp2,V); 6111 // Blend answers 6112 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 6113 // x and y in bounds? (z and w are don't care) 6114 return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0; 6115#else // _XM_VMX128_INTRINSICS_ 6116#endif // _XM_VMX128_INTRINSICS_ 6117} 6118 6119//------------------------------------------------------------------------------ 6120 6121XMFINLINE BOOL XMVector2IsNaN 6122( 6123 FXMVECTOR V 6124) 6125{ 6126#if defined(_XM_NO_INTRINSICS_) 6127 return (XMISNAN(V.vector4_f32[0]) || 6128 XMISNAN(V.vector4_f32[1])); 6129#elif defined(_XM_SSE_INTRINSICS_) 6130 // Mask off the exponent 6131 __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity); 6132 // Mask off the mantissa 6133 __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest); 6134 // Are any of the exponents == 0x7F800000? 6135 vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity); 6136 // Are any of the mantissa's zero? (SSE2 doesn't have a neq test) 6137 vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero); 6138 // Perform a not on the NaN test to be true on NON-zero mantissas 6139 vTempNan = _mm_andnot_si128(vTempNan,vTempInf); 6140 // If x or y are NaN, the signs are true after the merge above 6141 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0); 6142#else // _XM_VMX128_INTRINSICS_ 6143#endif // _XM_VMX128_INTRINSICS_ 6144} 6145 6146//------------------------------------------------------------------------------ 6147 6148XMFINLINE BOOL XMVector2IsInfinite 6149( 6150 FXMVECTOR V 6151) 6152{ 6153#if defined(_XM_NO_INTRINSICS_) 6154 6155 return (XMISINF(V.vector4_f32[0]) || 6156 XMISINF(V.vector4_f32[1])); 6157#elif defined(_XM_SSE_INTRINSICS_) 6158 // Mask off the sign bit 6159 __m128 vTemp = _mm_and_ps(V,g_XMAbsMask); 6160 // Compare to infinity 6161 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); 6162 // If x or z are infinity, the signs are true. 6163 return ((_mm_movemask_ps(vTemp)&3) != 0); 6164#else // _XM_VMX128_INTRINSICS_ 6165#endif // _XM_VMX128_INTRINSICS_ 6166} 6167 6168//------------------------------------------------------------------------------ 6169// Computation operations 6170//------------------------------------------------------------------------------ 6171 6172//------------------------------------------------------------------------------ 6173 6174XMFINLINE XMVECTOR XMVector2Dot 6175( 6176 FXMVECTOR V1, 6177 FXMVECTOR V2 6178) 6179{ 6180#if defined(_XM_NO_INTRINSICS_) 6181 6182 XMVECTOR Result; 6183 6184 Result.vector4_f32[0] = 6185 Result.vector4_f32[1] = 6186 Result.vector4_f32[2] = 6187 Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1]; 6188 6189 return Result; 6190 6191#elif defined(_XM_SSE_INTRINSICS_) 6192 // Perform the dot product on x and y 6193 XMVECTOR vLengthSq = _mm_mul_ps(V1,V2); 6194 // vTemp has y splatted 6195 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6196 // x+y 6197 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6198 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6199 return vLengthSq; 6200#else // _XM_VMX128_INTRINSICS_ 6201#endif // _XM_VMX128_INTRINSICS_ 6202} 6203 6204//------------------------------------------------------------------------------ 6205 6206XMFINLINE XMVECTOR XMVector2Cross 6207( 6208 FXMVECTOR V1, 6209 FXMVECTOR V2 6210) 6211{ 6212#if defined(_XM_NO_INTRINSICS_) 6213 FLOAT fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]); 6214 XMVECTOR vResult = { 6215 fCross, 6216 fCross, 6217 fCross, 6218 fCross 6219 }; 6220 return vResult; 6221#elif defined(_XM_SSE_INTRINSICS_) 6222 // Swap x and y 6223 XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1)); 6224 // Perform the muls 6225 vResult = _mm_mul_ps(vResult,V1); 6226 // Splat y 6227 XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1)); 6228 // Sub the values 6229 vResult = _mm_sub_ss(vResult,vTemp); 6230 // Splat the cross product 6231 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0)); 6232 return vResult; 6233#else // _XM_VMX128_INTRINSICS_ 6234#endif // _XM_VMX128_INTRINSICS_ 6235} 6236 6237//------------------------------------------------------------------------------ 6238 6239XMFINLINE XMVECTOR XMVector2LengthSq 6240( 6241 FXMVECTOR V 6242) 6243{ 6244#if defined(_XM_NO_INTRINSICS_) 6245 return XMVector2Dot(V, V); 6246#elif defined(_XM_SSE_INTRINSICS_) 6247 // Perform the dot product on x and y 6248 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 6249 // vTemp has y splatted 6250 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6251 // x+y 6252 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6253 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6254 return vLengthSq; 6255#else 6256 return XMVector2Dot(V, V); 6257#endif 6258} 6259 6260//------------------------------------------------------------------------------ 6261 6262XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst 6263( 6264 FXMVECTOR V 6265) 6266{ 6267#if defined(_XM_NO_INTRINSICS_) 6268 6269 XMVECTOR Result; 6270 6271 Result = XMVector2LengthSq(V); 6272 Result = XMVectorReciprocalSqrtEst(Result); 6273 6274 return Result; 6275 6276#elif defined(_XM_SSE_INTRINSICS_) 6277 // Perform the dot product on x and y 6278 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 6279 // vTemp has y splatted 6280 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6281 // x+y 6282 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6283 vLengthSq = _mm_rsqrt_ss(vLengthSq); 6284 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6285 return vLengthSq; 6286#else // _XM_VMX128_INTRINSICS_ 6287#endif // _XM_VMX128_INTRINSICS_ 6288} 6289 6290//------------------------------------------------------------------------------ 6291 6292XMFINLINE XMVECTOR XMVector2ReciprocalLength 6293( 6294 FXMVECTOR V 6295) 6296{ 6297#if defined(_XM_NO_INTRINSICS_) 6298 6299 XMVECTOR Result; 6300 6301 Result = XMVector2LengthSq(V); 6302 Result = XMVectorReciprocalSqrt(Result); 6303 6304 return Result; 6305 6306#elif defined(_XM_SSE_INTRINSICS_) 6307 // Perform the dot product on x and y 6308 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 6309 // vTemp has y splatted 6310 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6311 // x+y 6312 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6313 vLengthSq = _mm_sqrt_ss(vLengthSq); 6314 vLengthSq = _mm_div_ss(g_XMOne,vLengthSq); 6315 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6316 return vLengthSq; 6317#else // _XM_VMX128_INTRINSICS_ 6318#endif // _XM_VMX128_INTRINSICS_ 6319} 6320 6321//------------------------------------------------------------------------------ 6322 6323XMFINLINE XMVECTOR XMVector2LengthEst 6324( 6325 FXMVECTOR V 6326) 6327{ 6328#if defined(_XM_NO_INTRINSICS_) 6329 XMVECTOR Result; 6330 Result = XMVector2LengthSq(V); 6331 Result = XMVectorSqrtEst(Result); 6332 return Result; 6333#elif defined(_XM_SSE_INTRINSICS_) 6334 // Perform the dot product on x and y 6335 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 6336 // vTemp has y splatted 6337 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6338 // x+y 6339 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6340 vLengthSq = _mm_sqrt_ss(vLengthSq); 6341 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6342 return vLengthSq; 6343#else // _XM_VMX128_INTRINSICS_ 6344#endif // _XM_VMX128_INTRINSICS_ 6345} 6346 6347//------------------------------------------------------------------------------ 6348 6349XMFINLINE XMVECTOR XMVector2Length 6350( 6351 FXMVECTOR V 6352) 6353{ 6354#if defined(_XM_NO_INTRINSICS_) 6355 6356 XMVECTOR Result; 6357 Result = XMVector2LengthSq(V); 6358 Result = XMVectorSqrt(Result); 6359 return Result; 6360 6361#elif defined(_XM_SSE_INTRINSICS_) 6362 // Perform the dot product on x and y 6363 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 6364 // vTemp has y splatted 6365 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6366 // x+y 6367 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6368 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6369 vLengthSq = _mm_sqrt_ps(vLengthSq); 6370 return vLengthSq; 6371#else // _XM_VMX128_INTRINSICS_ 6372#endif // _XM_VMX128_INTRINSICS_ 6373} 6374 6375//------------------------------------------------------------------------------ 6376// XMVector2NormalizeEst uses a reciprocal estimate and 6377// returns QNaN on zero and infinite vectors. 6378 6379XMFINLINE XMVECTOR XMVector2NormalizeEst 6380( 6381 FXMVECTOR V 6382) 6383{ 6384#if defined(_XM_NO_INTRINSICS_) 6385 6386 XMVECTOR Result; 6387 Result = XMVector2ReciprocalLength(V); 6388 Result = XMVectorMultiply(V, Result); 6389 return Result; 6390 6391#elif defined(_XM_SSE_INTRINSICS_) 6392 // Perform the dot product on x and y 6393 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 6394 // vTemp has y splatted 6395 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6396 // x+y 6397 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6398 vLengthSq = _mm_rsqrt_ss(vLengthSq); 6399 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6400 vLengthSq = _mm_mul_ps(vLengthSq,V); 6401 return vLengthSq; 6402#else // _XM_VMX128_INTRINSICS_ 6403#endif // _XM_VMX128_INTRINSICS_ 6404} 6405 6406//------------------------------------------------------------------------------ 6407 6408XMFINLINE XMVECTOR XMVector2Normalize 6409( 6410 FXMVECTOR V 6411) 6412{ 6413#if defined(_XM_NO_INTRINSICS_) 6414 FLOAT fLength; 6415 XMVECTOR vResult; 6416 6417 vResult = XMVector2Length( V ); 6418 fLength = vResult.vector4_f32[0]; 6419 6420 // Prevent divide by zero 6421 if (fLength > 0) { 6422 fLength = 1.0f/fLength; 6423 } 6424 6425 vResult.vector4_f32[0] = V.vector4_f32[0]*fLength; 6426 vResult.vector4_f32[1] = V.vector4_f32[1]*fLength; 6427 vResult.vector4_f32[2] = V.vector4_f32[2]*fLength; 6428 vResult.vector4_f32[3] = V.vector4_f32[3]*fLength; 6429 return vResult; 6430 6431#elif defined(_XM_SSE_INTRINSICS_) 6432 // Perform the dot product on x and y only 6433 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 6434 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); 6435 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 6436 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 6437 // Prepare for the division 6438 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); 6439 // Create zero with a single instruction 6440 XMVECTOR vZeroMask = _mm_setzero_ps(); 6441 // Test for a divide by zero (Must be FP to detect -0.0) 6442 vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult); 6443 // Failsafe on zero (Or epsilon) length planes 6444 // If the length is infinity, set the elements to zero 6445 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); 6446 // Reciprocal mul to perform the normalization 6447 vResult = _mm_div_ps(V,vResult); 6448 // Any that are infinity, set to zero 6449 vResult = _mm_and_ps(vResult,vZeroMask); 6450 // Select qnan or result based on infinite length 6451 XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN); 6452 XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq); 6453 vResult = _mm_or_ps(vTemp1,vTemp2); 6454 return vResult; 6455#else // _XM_VMX128_INTRINSICS_ 6456#endif // _XM_VMX128_INTRINSICS_ 6457} 6458 6459//------------------------------------------------------------------------------ 6460 6461XMFINLINE XMVECTOR XMVector2ClampLength 6462( 6463 FXMVECTOR V, 6464 FLOAT LengthMin, 6465 FLOAT LengthMax 6466) 6467{ 6468#if defined(_XM_NO_INTRINSICS_) 6469 6470 XMVECTOR ClampMax; 6471 XMVECTOR ClampMin; 6472 6473 ClampMax = XMVectorReplicate(LengthMax); 6474 ClampMin = XMVectorReplicate(LengthMin); 6475 6476 return XMVector2ClampLengthV(V, ClampMin, ClampMax); 6477 6478#elif defined(_XM_SSE_INTRINSICS_) 6479 XMVECTOR ClampMax = _mm_set_ps1(LengthMax); 6480 XMVECTOR ClampMin = _mm_set_ps1(LengthMin); 6481 return XMVector2ClampLengthV(V, ClampMin, ClampMax); 6482#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 6483#endif // _XM_VMX128_INTRINSICS_ 6484} 6485 6486//------------------------------------------------------------------------------ 6487 6488XMFINLINE XMVECTOR XMVector2ClampLengthV 6489( 6490 FXMVECTOR V, 6491 FXMVECTOR LengthMin, 6492 FXMVECTOR LengthMax 6493) 6494{ 6495#if defined(_XM_NO_INTRINSICS_) 6496 6497 XMVECTOR ClampLength; 6498 XMVECTOR LengthSq; 6499 XMVECTOR RcpLength; 6500 XMVECTOR Length; 6501 XMVECTOR Normal; 6502 XMVECTOR Zero; 6503 XMVECTOR InfiniteLength; 6504 XMVECTOR ZeroLength; 6505 XMVECTOR Select; 6506 XMVECTOR ControlMax; 6507 XMVECTOR ControlMin; 6508 XMVECTOR Control; 6509 XMVECTOR Result; 6510 6511 XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0])); 6512 XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0])); 6513 XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero())); 6514 XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero())); 6515 XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin)); 6516 6517 LengthSq = XMVector2LengthSq(V); 6518 6519 Zero = XMVectorZero(); 6520 6521 RcpLength = XMVectorReciprocalSqrt(LengthSq); 6522 6523 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); 6524 ZeroLength = XMVectorEqual(LengthSq, Zero); 6525 6526 Length = XMVectorMultiply(LengthSq, RcpLength); 6527 6528 Normal = XMVectorMultiply(V, RcpLength); 6529 6530 Select = XMVectorEqualInt(InfiniteLength, ZeroLength); 6531 Length = XMVectorSelect(LengthSq, Length, Select); 6532 Normal = XMVectorSelect(LengthSq, Normal, Select); 6533 6534 ControlMax = XMVectorGreater(Length, LengthMax); 6535 ControlMin = XMVectorLess(Length, LengthMin); 6536 6537 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); 6538 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); 6539 6540 Result = XMVectorMultiply(Normal, ClampLength); 6541 6542 // Preserve the original vector (with no precision loss) if the length falls within the given range 6543 Control = XMVectorEqualInt(ControlMax, ControlMin); 6544 Result = XMVectorSelect(Result, V, Control); 6545 6546 return Result; 6547 6548#elif defined(_XM_SSE_INTRINSICS_) 6549 XMVECTOR ClampLength; 6550 XMVECTOR LengthSq; 6551 XMVECTOR RcpLength; 6552 XMVECTOR Length; 6553 XMVECTOR Normal; 6554 XMVECTOR InfiniteLength; 6555 XMVECTOR ZeroLength; 6556 XMVECTOR Select; 6557 XMVECTOR ControlMax; 6558 XMVECTOR ControlMin; 6559 XMVECTOR Control; 6560 XMVECTOR Result; 6561 6562 XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin))); 6563 XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax))); 6564 XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero)); 6565 XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero)); 6566 XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin)); 6567 LengthSq = XMVector2LengthSq(V); 6568 RcpLength = XMVectorReciprocalSqrt(LengthSq); 6569 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity); 6570 ZeroLength = XMVectorEqual(LengthSq, g_XMZero); 6571 Length = _mm_mul_ps(LengthSq, RcpLength); 6572 Normal = _mm_mul_ps(V, RcpLength); 6573 Select = XMVectorEqualInt(InfiniteLength, ZeroLength); 6574 Length = XMVectorSelect(LengthSq, Length, Select); 6575 Normal = XMVectorSelect(LengthSq, Normal, Select); 6576 ControlMax = XMVectorGreater(Length, LengthMax); 6577 ControlMin = XMVectorLess(Length, LengthMin); 6578 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); 6579 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); 6580 Result = _mm_mul_ps(Normal, ClampLength); 6581 // Preserve the original vector (with no precision loss) if the length falls within the given range 6582 Control = XMVectorEqualInt(ControlMax, ControlMin); 6583 Result = XMVectorSelect(Result, V, Control); 6584 return Result; 6585#else // _XM_VMX128_INTRINSICS_ 6586#endif // _XM_VMX128_INTRINSICS_ 6587} 6588 6589//------------------------------------------------------------------------------ 6590 6591XMFINLINE XMVECTOR XMVector2Reflect 6592( 6593 FXMVECTOR Incident, 6594 FXMVECTOR Normal 6595) 6596{ 6597#if defined(_XM_NO_INTRINSICS_) 6598 6599 XMVECTOR Result; 6600 6601 // Result = Incident - (2 * dot(Incident, Normal)) * Normal 6602 Result = XMVector2Dot(Incident, Normal); 6603 Result = XMVectorAdd(Result, Result); 6604 Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); 6605 6606 return Result; 6607 6608#elif defined(_XM_SSE_INTRINSICS_) 6609 // Result = Incident - (2 * dot(Incident, Normal)) * Normal 6610 XMVECTOR Result = XMVector2Dot(Incident,Normal); 6611 Result = _mm_add_ps(Result, Result); 6612 Result = _mm_mul_ps(Result, Normal); 6613 Result = _mm_sub_ps(Incident,Result); 6614 return Result; 6615#else // _XM_VMX128_INTRINSICS_ 6616#endif // _XM_VMX128_INTRINSICS_ 6617} 6618 6619//------------------------------------------------------------------------------ 6620 6621XMFINLINE XMVECTOR XMVector2Refract 6622( 6623 FXMVECTOR Incident, 6624 FXMVECTOR Normal, 6625 FLOAT RefractionIndex 6626) 6627{ 6628#if defined(_XM_NO_INTRINSICS_) 6629 XMVECTOR Index; 6630 Index = XMVectorReplicate(RefractionIndex); 6631 return XMVector2RefractV(Incident, Normal, Index); 6632 6633#elif defined(_XM_SSE_INTRINSICS_) 6634 XMVECTOR Index = _mm_set_ps1(RefractionIndex); 6635 return XMVector2RefractV(Incident,Normal,Index); 6636#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 6637#endif // _XM_VMX128_INTRINSICS_ 6638} 6639 6640//------------------------------------------------------------------------------ 6641 6642// Return the refraction of a 2D vector 6643XMFINLINE XMVECTOR XMVector2RefractV 6644( 6645 FXMVECTOR Incident, 6646 FXMVECTOR Normal, 6647 FXMVECTOR RefractionIndex 6648) 6649{ 6650#if defined(_XM_NO_INTRINSICS_) 6651 float IDotN; 6652 float RX,RY; 6653 XMVECTOR vResult; 6654 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 6655 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) 6656 IDotN = (Incident.vector4_f32[0]*Normal.vector4_f32[0])+(Incident.vector4_f32[1]*Normal.vector4_f32[1]); 6657 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) 6658 RY = 1.0f-(IDotN*IDotN); 6659 RX = 1.0f-(RY*RefractionIndex.vector4_f32[0]*RefractionIndex.vector4_f32[0]); 6660 RY = 1.0f-(RY*RefractionIndex.vector4_f32[1]*RefractionIndex.vector4_f32[1]); 6661 if (RX>=0.0f) { 6662 RX = (RefractionIndex.vector4_f32[0]*Incident.vector4_f32[0])-(Normal.vector4_f32[0]*((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX))); 6663 } else { 6664 RX = 0.0f; 6665 } 6666 if (RY>=0.0f) { 6667 RY = (RefractionIndex.vector4_f32[1]*Incident.vector4_f32[1])-(Normal.vector4_f32[1]*((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY))); 6668 } else { 6669 RY = 0.0f; 6670 } 6671 vResult.vector4_f32[0] = RX; 6672 vResult.vector4_f32[1] = RY; 6673 vResult.vector4_f32[2] = 0.0f; 6674 vResult.vector4_f32[3] = 0.0f; 6675 return vResult; 6676#elif defined(_XM_SSE_INTRINSICS_) 6677 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 6678 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) 6679 // Get the 2D Dot product of Incident-Normal 6680 XMVECTOR IDotN = _mm_mul_ps(Incident,Normal); 6681 XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1)); 6682 IDotN = _mm_add_ss(IDotN,vTemp); 6683 IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0)); 6684 // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) 6685 vTemp = _mm_mul_ps(IDotN,IDotN); 6686 vTemp = _mm_sub_ps(g_XMOne,vTemp); 6687 vTemp = _mm_mul_ps(vTemp,RefractionIndex); 6688 vTemp = _mm_mul_ps(vTemp,RefractionIndex); 6689 vTemp = _mm_sub_ps(g_XMOne,vTemp); 6690 // If any terms are <=0, sqrt() will fail, punt to zero 6691 XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero); 6692 // R = RefractionIndex * IDotN + sqrt(R) 6693 vTemp = _mm_sqrt_ps(vTemp); 6694 XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN); 6695 vTemp = _mm_add_ps(vTemp,vResult); 6696 // Result = RefractionIndex * Incident - Normal * R 6697 vResult = _mm_mul_ps(RefractionIndex,Incident); 6698 vTemp = _mm_mul_ps(vTemp,Normal); 6699 vResult = _mm_sub_ps(vResult,vTemp); 6700 vResult = _mm_and_ps(vResult,vMask); 6701 return vResult; 6702#else // _XM_VMX128_INTRINSICS_ 6703#endif // _XM_VMX128_INTRINSICS_ 6704} 6705 6706//------------------------------------------------------------------------------ 6707 6708XMFINLINE XMVECTOR XMVector2Orthogonal 6709( 6710 FXMVECTOR V 6711) 6712{ 6713#if defined(_XM_NO_INTRINSICS_) 6714 6715 XMVECTOR Result; 6716 6717 Result.vector4_f32[0] = -V.vector4_f32[1]; 6718 Result.vector4_f32[1] = V.vector4_f32[0]; 6719 6720 return Result; 6721 6722#elif defined(_XM_SSE_INTRINSICS_) 6723 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); 6724 vResult = _mm_mul_ps(vResult,g_XMNegateX); 6725 return vResult; 6726#else // _XM_VMX128_INTRINSICS_ 6727#endif // _XM_VMX128_INTRINSICS_ 6728} 6729 6730//------------------------------------------------------------------------------ 6731 6732XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst 6733( 6734 FXMVECTOR N1, 6735 FXMVECTOR N2 6736) 6737{ 6738#if defined(_XM_NO_INTRINSICS_) 6739 6740 XMVECTOR NegativeOne; 6741 XMVECTOR One; 6742 XMVECTOR Result; 6743 6744 Result = XMVector2Dot(N1, N2); 6745 NegativeOne = XMVectorSplatConstant(-1, 0); 6746 One = XMVectorSplatOne(); 6747 Result = XMVectorClamp(Result, NegativeOne, One); 6748 Result = XMVectorACosEst(Result); 6749 6750 return Result; 6751 6752#elif defined(_XM_SSE_INTRINSICS_) 6753 XMVECTOR vResult = XMVector2Dot(N1,N2); 6754 // Clamp to -1.0f to 1.0f 6755 vResult = _mm_max_ps(vResult,g_XMNegativeOne); 6756 vResult = _mm_min_ps(vResult,g_XMOne);; 6757 vResult = XMVectorACosEst(vResult); 6758 return vResult; 6759#else // _XM_VMX128_INTRINSICS_ 6760#endif // _XM_VMX128_INTRINSICS_ 6761} 6762 6763//------------------------------------------------------------------------------ 6764 6765XMFINLINE XMVECTOR XMVector2AngleBetweenNormals 6766( 6767 FXMVECTOR N1, 6768 FXMVECTOR N2 6769) 6770{ 6771#if defined(_XM_NO_INTRINSICS_) 6772 6773 XMVECTOR NegativeOne; 6774 XMVECTOR One; 6775 XMVECTOR Result; 6776 6777 Result = XMVector2Dot(N1, N2); 6778 NegativeOne = XMVectorSplatConstant(-1, 0); 6779 One = XMVectorSplatOne(); 6780 Result = XMVectorClamp(Result, NegativeOne, One); 6781 Result = XMVectorACos(Result); 6782 6783 return Result; 6784 6785#elif defined(_XM_SSE_INTRINSICS_) 6786 XMVECTOR vResult = XMVector2Dot(N1,N2); 6787 // Clamp to -1.0f to 1.0f 6788 vResult = _mm_max_ps(vResult,g_XMNegativeOne); 6789 vResult = _mm_min_ps(vResult,g_XMOne);; 6790 vResult = XMVectorACos(vResult); 6791 return vResult; 6792#else // _XM_VMX128_INTRINSICS_ 6793#endif // _XM_VMX128_INTRINSICS_ 6794} 6795 6796//------------------------------------------------------------------------------ 6797 6798XMFINLINE XMVECTOR XMVector2AngleBetweenVectors 6799( 6800 FXMVECTOR V1, 6801 FXMVECTOR V2 6802) 6803{ 6804#if defined(_XM_NO_INTRINSICS_) 6805 6806 XMVECTOR L1; 6807 XMVECTOR L2; 6808 XMVECTOR Dot; 6809 XMVECTOR CosAngle; 6810 XMVECTOR NegativeOne; 6811 XMVECTOR One; 6812 XMVECTOR Result; 6813 6814 L1 = XMVector2ReciprocalLength(V1); 6815 L2 = XMVector2ReciprocalLength(V2); 6816 6817 Dot = XMVector2Dot(V1, V2); 6818 6819 L1 = XMVectorMultiply(L1, L2); 6820 6821 CosAngle = XMVectorMultiply(Dot, L1); 6822 NegativeOne = XMVectorSplatConstant(-1, 0); 6823 One = XMVectorSplatOne(); 6824 CosAngle = XMVectorClamp(CosAngle, NegativeOne, One); 6825 6826 Result = XMVectorACos(CosAngle); 6827 6828 return Result; 6829 6830#elif defined(_XM_SSE_INTRINSICS_) 6831 XMVECTOR L1; 6832 XMVECTOR L2; 6833 XMVECTOR Dot; 6834 XMVECTOR CosAngle; 6835 XMVECTOR Result; 6836 L1 = XMVector2ReciprocalLength(V1); 6837 L2 = XMVector2ReciprocalLength(V2); 6838 Dot = XMVector2Dot(V1, V2); 6839 L1 = _mm_mul_ps(L1, L2); 6840 CosAngle = _mm_mul_ps(Dot, L1); 6841 CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne); 6842 Result = XMVectorACos(CosAngle); 6843 return Result; 6844#else // _XM_VMX128_INTRINSICS_ 6845#endif // _XM_VMX128_INTRINSICS_ 6846} 6847 6848//------------------------------------------------------------------------------ 6849 6850XMFINLINE XMVECTOR XMVector2LinePointDistance 6851( 6852 FXMVECTOR LinePoint1, 6853 FXMVECTOR LinePoint2, 6854 FXMVECTOR Point 6855) 6856{ 6857#if defined(_XM_NO_INTRINSICS_) 6858 6859 XMVECTOR PointVector; 6860 XMVECTOR LineVector; 6861 XMVECTOR ReciprocalLengthSq; 6862 XMVECTOR PointProjectionScale; 6863 XMVECTOR DistanceVector; 6864 XMVECTOR Result; 6865 6866 // Given a vector PointVector from LinePoint1 to Point and a vector 6867 // LineVector from LinePoint1 to LinePoint2, the scaled distance 6868 // PointProjectionScale from LinePoint1 to the perpendicular projection 6869 // of PointVector onto the line is defined as: 6870 // 6871 // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector) 6872 6873 PointVector = XMVectorSubtract(Point, LinePoint1); 6874 LineVector = XMVectorSubtract(LinePoint2, LinePoint1); 6875 6876 ReciprocalLengthSq = XMVector2LengthSq(LineVector); 6877 ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq); 6878 6879 PointProjectionScale = XMVector2Dot(PointVector, LineVector); 6880 PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq); 6881 6882 DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale); 6883 DistanceVector = XMVectorSubtract(PointVector, DistanceVector); 6884 6885 Result = XMVector2Length(DistanceVector); 6886 6887 return Result; 6888 6889#elif defined(_XM_SSE_INTRINSICS_) 6890 XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1); 6891 XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1); 6892 XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector); 6893 XMVECTOR vResult = XMVector2Dot(PointVector,LineVector); 6894 vResult = _mm_div_ps(vResult,ReciprocalLengthSq); 6895 vResult = _mm_mul_ps(vResult,LineVector); 6896 vResult = _mm_sub_ps(PointVector,vResult); 6897 vResult = XMVector2Length(vResult); 6898 return vResult; 6899#else // _XM_VMX128_INTRINSICS_ 6900#endif // _XM_VMX128_INTRINSICS_ 6901} 6902 6903//------------------------------------------------------------------------------ 6904 6905XMFINLINE XMVECTOR XMVector2IntersectLine 6906( 6907 FXMVECTOR Line1Point1, 6908 FXMVECTOR Line1Point2, 6909 FXMVECTOR Line2Point1, 6910 CXMVECTOR Line2Point2 6911) 6912{ 6913#if defined(_XM_NO_INTRINSICS_) 6914 6915 XMVECTOR V1; 6916 XMVECTOR V2; 6917 XMVECTOR V3; 6918 XMVECTOR C1; 6919 XMVECTOR C2; 6920 XMVECTOR Result; 6921 CONST XMVECTOR Zero = XMVectorZero(); 6922 6923 V1 = XMVectorSubtract(Line1Point2, Line1Point1); 6924 V2 = XMVectorSubtract(Line2Point2, Line2Point1); 6925 V3 = XMVectorSubtract(Line1Point1, Line2Point1); 6926 6927 C1 = XMVector2Cross(V1, V2); 6928 C2 = XMVector2Cross(V2, V3); 6929 6930 if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v)) 6931 { 6932 if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v)) 6933 { 6934 // Coincident 6935 Result = g_XMInfinity.v; 6936 } 6937 else 6938 { 6939 // Parallel 6940 Result = g_XMQNaN.v; 6941 } 6942 } 6943 else 6944 { 6945 // Intersection point = Line1Point1 + V1 * (C2 / C1) 6946 XMVECTOR Scale; 6947 Scale = XMVectorReciprocal(C1); 6948 Scale = XMVectorMultiply(C2, Scale); 6949 Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1); 6950 } 6951 6952 return Result; 6953 6954#elif defined(_XM_SSE_INTRINSICS_) 6955 XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1); 6956 XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1); 6957 XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1); 6958 // Generate the cross products 6959 XMVECTOR C1 = XMVector2Cross(V1, V2); 6960 XMVECTOR C2 = XMVector2Cross(V2, V3); 6961 // If C1 is not close to epsilon, use the calculated value 6962 XMVECTOR vResultMask = _mm_setzero_ps(); 6963 vResultMask = _mm_sub_ps(vResultMask,C1); 6964 vResultMask = _mm_max_ps(vResultMask,C1); 6965 // 0xFFFFFFFF if the calculated value is to be used 6966 vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon); 6967 // If C1 is close to epsilon, which fail type is it? INFINITY or NAN? 6968 XMVECTOR vFailMask = _mm_setzero_ps(); 6969 vFailMask = _mm_sub_ps(vFailMask,C2); 6970 vFailMask = _mm_max_ps(vFailMask,C2); 6971 vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon); 6972 XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity); 6973 vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN); 6974 // vFail is NAN or INF 6975 vFail = _mm_or_ps(vFail,vFailMask); 6976 // Intersection point = Line1Point1 + V1 * (C2 / C1) 6977 XMVECTOR vResult = _mm_div_ps(C2,C1); 6978 vResult = _mm_mul_ps(vResult,V1); 6979 vResult = _mm_add_ps(vResult,Line1Point1); 6980 // Use result, or failure value 6981 vResult = _mm_and_ps(vResult,vResultMask); 6982 vResultMask = _mm_andnot_ps(vResultMask,vFail); 6983 vResult = _mm_or_ps(vResult,vResultMask); 6984 return vResult; 6985#else // _XM_VMX128_INTRINSICS_ 6986#endif // _XM_VMX128_INTRINSICS_ 6987} 6988 6989//------------------------------------------------------------------------------ 6990 6991XMFINLINE XMVECTOR XMVector2Transform 6992( 6993 FXMVECTOR V, 6994 CXMMATRIX M 6995) 6996{ 6997#if defined(_XM_NO_INTRINSICS_) 6998 6999 XMVECTOR X; 7000 XMVECTOR Y; 7001 XMVECTOR Result; 7002 7003 Y = XMVectorSplatY(V); 7004 X = XMVectorSplatX(V); 7005 7006 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); 7007 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 7008 7009 return Result; 7010 7011#elif defined(_XM_SSE_INTRINSICS_) 7012 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); 7013 vResult = _mm_mul_ps(vResult,M.r[0]); 7014 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 7015 vTemp = _mm_mul_ps(vTemp,M.r[1]); 7016 vResult = _mm_add_ps(vResult,vTemp); 7017 vResult = _mm_add_ps(vResult,M.r[3]); 7018 return vResult; 7019#else // _XM_VMX128_INTRINSICS_ 7020#endif // _XM_VMX128_INTRINSICS_ 7021} 7022 7023//------------------------------------------------------------------------------ 7024 7025XMINLINE XMFLOAT4* XMVector2TransformStream 7026( 7027 XMFLOAT4* pOutputStream, 7028 UINT OutputStride, 7029 CONST XMFLOAT2* pInputStream, 7030 UINT InputStride, 7031 UINT VectorCount, 7032 CXMMATRIX M 7033) 7034{ 7035#if defined(_XM_NO_INTRINSICS_) 7036 7037 XMVECTOR V; 7038 XMVECTOR X; 7039 XMVECTOR Y; 7040 XMVECTOR Result; 7041 UINT i; 7042 BYTE* pInputVector = (BYTE*)pInputStream; 7043 BYTE* pOutputVector = (BYTE*)pOutputStream; 7044 7045 XMASSERT(pOutputStream); 7046 XMASSERT(pInputStream); 7047 7048 for (i = 0; i < VectorCount; i++) 7049 { 7050 V = XMLoadFloat2((XMFLOAT2*)pInputVector); 7051 Y = XMVectorSplatY(V); 7052 X = XMVectorSplatX(V); 7053// Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y); 7054// X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x); 7055 7056 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); 7057 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 7058 7059 XMStoreFloat4((XMFLOAT4*)pOutputVector, Result); 7060 7061 pInputVector += InputStride; 7062 pOutputVector += OutputStride; 7063 } 7064 7065 return pOutputStream; 7066 7067#elif defined(_XM_SSE_INTRINSICS_) 7068 XMASSERT(pOutputStream); 7069 XMASSERT(pInputStream); 7070 UINT i; 7071 const BYTE* pInputVector = (const BYTE*)pInputStream; 7072 BYTE* pOutputVector = (BYTE*)pOutputStream; 7073 7074 for (i = 0; i < VectorCount; i++) 7075 { 7076 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x); 7077 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y); 7078 vResult = _mm_mul_ps(vResult,M.r[1]); 7079 vResult = _mm_add_ps(vResult,M.r[3]); 7080 X = _mm_mul_ps(X,M.r[0]); 7081 vResult = _mm_add_ps(vResult,X); 7082 _mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult); 7083 pInputVector += InputStride; 7084 pOutputVector += OutputStride; 7085 } 7086 return pOutputStream; 7087#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 7088#endif // _XM_VMX128_INTRINSICS_ 7089} 7090 7091//------------------------------------------------------------------------------ 7092 7093XMINLINE XMFLOAT4* XMVector2TransformStreamNC 7094( 7095 XMFLOAT4* pOutputStream, 7096 UINT OutputStride, 7097 CONST XMFLOAT2* pInputStream, 7098 UINT InputStride, 7099 UINT VectorCount, 7100 CXMMATRIX M 7101) 7102{ 7103#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_) 7104 return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M ); 7105#else // _XM_VMX128_INTRINSICS_ 7106#endif // _XM_VMX128_INTRINSICS_ 7107} 7108 7109//------------------------------------------------------------------------------ 7110 7111XMFINLINE XMVECTOR XMVector2TransformCoord 7112( 7113 FXMVECTOR V, 7114 CXMMATRIX M 7115) 7116{ 7117#if defined(_XM_NO_INTRINSICS_) 7118 7119 XMVECTOR X; 7120 XMVECTOR Y; 7121 XMVECTOR InverseW; 7122 XMVECTOR Result; 7123 7124 Y = XMVectorSplatY(V); 7125 X = XMVectorSplatX(V); 7126 7127 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); 7128 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 7129 7130 InverseW = XMVectorSplatW(Result); 7131 InverseW = XMVectorReciprocal(InverseW); 7132 7133 Result = XMVectorMultiply(Result, InverseW); 7134 7135 return Result; 7136 7137#elif defined(_XM_SSE_INTRINSICS_) 7138 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); 7139 vResult = _mm_mul_ps(vResult,M.r[0]); 7140 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 7141 vTemp = _mm_mul_ps(vTemp,M.r[1]); 7142 vResult = _mm_add_ps(vResult,vTemp); 7143 vResult = _mm_add_ps(vResult,M.r[3]); 7144 vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); 7145 vResult = _mm_div_ps(vResult,vTemp); 7146 return vResult; 7147#else // _XM_VMX128_INTRINSICS_ 7148#endif // _XM_VMX128_INTRINSICS_ 7149} 7150 7151//------------------------------------------------------------------------------ 7152 7153XMINLINE XMFLOAT2* XMVector2TransformCoordStream 7154( 7155 XMFLOAT2* pOutputStream, 7156 UINT OutputStride, 7157 CONST XMFLOAT2* pInputStream, 7158 UINT InputStride, 7159 UINT VectorCount, 7160 CXMMATRIX M 7161) 7162{ 7163#if defined(_XM_NO_INTRINSICS_) 7164 7165 XMVECTOR V; 7166 XMVECTOR X; 7167 XMVECTOR Y; 7168 XMVECTOR InverseW; 7169 XMVECTOR Result; 7170 UINT i; 7171 BYTE* pInputVector = (BYTE*)pInputStream; 7172 BYTE* pOutputVector = (BYTE*)pOutputStream; 7173 7174 XMASSERT(pOutputStream); 7175 XMASSERT(pInputStream); 7176 7177 for (i = 0; i < VectorCount; i++) 7178 { 7179 V = XMLoadFloat2((XMFLOAT2*)pInputVector); 7180 Y = XMVectorSplatY(V); 7181 X = XMVectorSplatX(V); 7182// Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y); 7183// X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x); 7184 7185 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); 7186 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 7187 7188 InverseW = XMVectorSplatW(Result); 7189 InverseW = XMVectorReciprocal(InverseW); 7190 7191 Result = XMVectorMultiply(Result, InverseW); 7192 7193 XMStoreFloat2((XMFLOAT2*)pOutputVector, Result); 7194 7195 pInputVector += InputStride; 7196 pOutputVector += OutputStride; 7197 } 7198 7199 return pOutputStream; 7200 7201#elif defined(_XM_SSE_INTRINSICS_) 7202 XMASSERT(pOutputStream); 7203 XMASSERT(pInputStream); 7204 UINT i; 7205 const BYTE *pInputVector = (BYTE*)pInputStream; 7206 BYTE *pOutputVector = (BYTE*)pOutputStream; 7207 7208 for (i = 0; i < VectorCount; i++) 7209 { 7210 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x); 7211 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y); 7212 vResult = _mm_mul_ps(vResult,M.r[1]); 7213 vResult = _mm_add_ps(vResult,M.r[3]); 7214 X = _mm_mul_ps(X,M.r[0]); 7215 vResult = _mm_add_ps(vResult,X); 7216 X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); 7217 vResult = _mm_div_ps(vResult,X); 7218 _mm_store_sd(reinterpret_cast<double *>(pOutputVector),reinterpret_cast<__m128d *>(&vResult)[0]); 7219 pInputVector += InputStride; 7220 pOutputVector += OutputStride; 7221 } 7222 return pOutputStream; 7223#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 7224#endif // _XM_VMX128_INTRINSICS_ 7225} 7226 7227//------------------------------------------------------------------------------ 7228 7229XMFINLINE XMVECTOR XMVector2TransformNormal 7230( 7231 FXMVECTOR V, 7232 CXMMATRIX M 7233) 7234{ 7235#if defined(_XM_NO_INTRINSICS_) 7236 7237 XMVECTOR X; 7238 XMVECTOR Y; 7239 XMVECTOR Result; 7240 7241 Y = XMVectorSplatY(V); 7242 X = XMVectorSplatX(V); 7243 7244 Result = XMVectorMultiply(Y, M.r[1]); 7245 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 7246 7247 return Result; 7248 7249#elif defined(_XM_SSE_INTRINSICS_) 7250 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); 7251 vResult = _mm_mul_ps(vResult,M.r[0]); 7252 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 7253 vTemp = _mm_mul_ps(vTemp,M.r[1]); 7254 vResult = _mm_add_ps(vResult,vTemp); 7255 return vResult; 7256#else // _XM_VMX128_INTRINSICS_ 7257#endif // _XM_VMX128_INTRINSICS_ 7258} 7259 7260//------------------------------------------------------------------------------ 7261 7262XMINLINE XMFLOAT2* XMVector2TransformNormalStream 7263( 7264 XMFLOAT2* pOutputStream, 7265 UINT OutputStride, 7266 CONST XMFLOAT2* pInputStream, 7267 UINT InputStride, 7268 UINT VectorCount, 7269 CXMMATRIX M 7270) 7271{ 7272#if defined(_XM_NO_INTRINSICS_) 7273 7274 XMVECTOR V; 7275 XMVECTOR X; 7276 XMVECTOR Y; 7277 XMVECTOR Result; 7278 UINT i; 7279 BYTE* pInputVector = (BYTE*)pInputStream; 7280 BYTE* pOutputVector = (BYTE*)pOutputStream; 7281 7282 XMASSERT(pOutputStream); 7283 XMASSERT(pInputStream); 7284 7285 for (i = 0; i < VectorCount; i++) 7286 { 7287 V = XMLoadFloat2((XMFLOAT2*)pInputVector); 7288 Y = XMVectorSplatY(V); 7289 X = XMVectorSplatX(V); 7290// Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y); 7291// X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x); 7292 7293 Result = XMVectorMultiply(Y, M.r[1]); 7294 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 7295 7296 XMStoreFloat2((XMFLOAT2*)pOutputVector, Result); 7297 7298 pInputVector += InputStride; 7299 pOutputVector += OutputStride; 7300 } 7301 7302 return pOutputStream; 7303 7304#elif defined(_XM_SSE_INTRINSICS_) 7305 XMASSERT(pOutputStream); 7306 XMASSERT(pInputStream); 7307 UINT i; 7308 const BYTE*pInputVector = (const BYTE*)pInputStream; 7309 BYTE *pOutputVector = (BYTE*)pOutputStream; 7310 for (i = 0; i < VectorCount; i++) 7311 { 7312 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x); 7313 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y); 7314 vResult = _mm_mul_ps(vResult,M.r[1]); 7315 X = _mm_mul_ps(X,M.r[0]); 7316 vResult = _mm_add_ps(vResult,X); 7317 _mm_store_sd(reinterpret_cast<double*>(pOutputVector),reinterpret_cast<const __m128d *>(&vResult)[0]); 7318 7319 pInputVector += InputStride; 7320 pOutputVector += OutputStride; 7321 } 7322 7323 return pOutputStream; 7324#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 7325#endif // _XM_VMX128_INTRINSICS_ 7326} 7327 7328/**************************************************************************** 7329 * 7330 * 3D Vector 7331 * 7332 ****************************************************************************/ 7333 7334//------------------------------------------------------------------------------ 7335// Comparison operations 7336//------------------------------------------------------------------------------ 7337 7338//------------------------------------------------------------------------------ 7339 7340XMFINLINE BOOL XMVector3Equal 7341( 7342 FXMVECTOR V1, 7343 FXMVECTOR V2 7344) 7345{ 7346#if defined(_XM_NO_INTRINSICS_) 7347 return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0); 7348#elif defined(_XM_SSE_INTRINSICS_) 7349 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 7350 return (((_mm_movemask_ps(vTemp)&7)==7) != 0); 7351#else // _XM_VMX128_INTRINSICS_ 7352 return XMComparisonAllTrue(XMVector3EqualR(V1, V2)); 7353#endif 7354} 7355 7356//------------------------------------------------------------------------------ 7357 7358XMFINLINE UINT XMVector3EqualR 7359( 7360 FXMVECTOR V1, 7361 FXMVECTOR V2 7362) 7363{ 7364#if defined(_XM_NO_INTRINSICS_) 7365 UINT CR = 0; 7366 if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && 7367 (V1.vector4_f32[1] == V2.vector4_f32[1]) && 7368 (V1.vector4_f32[2] == V2.vector4_f32[2])) 7369 { 7370 CR = XM_CRMASK_CR6TRUE; 7371 } 7372 else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && 7373 (V1.vector4_f32[1] != V2.vector4_f32[1]) && 7374 (V1.vector4_f32[2] != V2.vector4_f32[2])) 7375 { 7376 CR = XM_CRMASK_CR6FALSE; 7377 } 7378 return CR; 7379#elif defined(_XM_SSE_INTRINSICS_) 7380 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 7381 int iTest = _mm_movemask_ps(vTemp)&7; 7382 UINT CR = 0; 7383 if (iTest==7) 7384 { 7385 CR = XM_CRMASK_CR6TRUE; 7386 } 7387 else if (!iTest) 7388 { 7389 CR = XM_CRMASK_CR6FALSE; 7390 } 7391 return CR; 7392#else // _XM_VMX128_INTRINSICS_ 7393#endif // _XM_VMX128_INTRINSICS_ 7394} 7395 7396//------------------------------------------------------------------------------ 7397 7398XMFINLINE BOOL XMVector3EqualInt 7399( 7400 FXMVECTOR V1, 7401 FXMVECTOR V2 7402) 7403{ 7404#if defined(_XM_NO_INTRINSICS_) 7405 return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0); 7406#elif defined(_XM_SSE_INTRINSICS_) 7407 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 7408 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0); 7409#else // _XM_VMX128_INTRINSICS_ 7410 return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2)); 7411#endif 7412} 7413 7414//------------------------------------------------------------------------------ 7415 7416XMFINLINE UINT XMVector3EqualIntR 7417( 7418 FXMVECTOR V1, 7419 FXMVECTOR V2 7420) 7421{ 7422#if defined(_XM_NO_INTRINSICS_) 7423 UINT CR = 0; 7424 if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && 7425 (V1.vector4_u32[1] == V2.vector4_u32[1]) && 7426 (V1.vector4_u32[2] == V2.vector4_u32[2])) 7427 { 7428 CR = XM_CRMASK_CR6TRUE; 7429 } 7430 else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && 7431 (V1.vector4_u32[1] != V2.vector4_u32[1]) && 7432 (V1.vector4_u32[2] != V2.vector4_u32[2])) 7433 { 7434 CR = XM_CRMASK_CR6FALSE; 7435 } 7436 return CR; 7437#elif defined(_XM_SSE_INTRINSICS_) 7438 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 7439 int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7; 7440 UINT CR = 0; 7441 if (iTemp==7) 7442 { 7443 CR = XM_CRMASK_CR6TRUE; 7444 } 7445 else if (!iTemp) 7446 { 7447 CR = XM_CRMASK_CR6FALSE; 7448 } 7449 return CR; 7450#else // _XM_VMX128_INTRINSICS_ 7451#endif // _XM_VMX128_INTRINSICS_ 7452} 7453 7454//------------------------------------------------------------------------------ 7455 7456XMFINLINE BOOL XMVector3NearEqual 7457( 7458 FXMVECTOR V1, 7459 FXMVECTOR V2, 7460 FXMVECTOR Epsilon 7461) 7462{ 7463#if defined(_XM_NO_INTRINSICS_) 7464 FLOAT dx, dy, dz; 7465 7466 dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]); 7467 dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]); 7468 dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]); 7469 return (((dx <= Epsilon.vector4_f32[0]) && 7470 (dy <= Epsilon.vector4_f32[1]) && 7471 (dz <= Epsilon.vector4_f32[2])) != 0); 7472#elif defined(_XM_SSE_INTRINSICS_) 7473 // Get the difference 7474 XMVECTOR vDelta = _mm_sub_ps(V1,V2); 7475 // Get the absolute value of the difference 7476 XMVECTOR vTemp = _mm_setzero_ps(); 7477 vTemp = _mm_sub_ps(vTemp,vDelta); 7478 vTemp = _mm_max_ps(vTemp,vDelta); 7479 vTemp = _mm_cmple_ps(vTemp,Epsilon); 7480 // w is don't care 7481 return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0); 7482#else // _XM_VMX128_INTRINSICS_ 7483#endif // _XM_VMX128_INTRINSICS_ 7484} 7485 7486//------------------------------------------------------------------------------ 7487 7488XMFINLINE BOOL XMVector3NotEqual 7489( 7490 FXMVECTOR V1, 7491 FXMVECTOR V2 7492) 7493{ 7494#if defined(_XM_NO_INTRINSICS_) 7495 return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0); 7496#elif defined(_XM_SSE_INTRINSICS_) 7497 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 7498 return (((_mm_movemask_ps(vTemp)&7)!=7) != 0); 7499#else // _XM_VMX128_INTRINSICS_ 7500 return XMComparisonAnyFalse(XMVector3EqualR(V1, V2)); 7501#endif 7502} 7503 7504//------------------------------------------------------------------------------ 7505 7506XMFINLINE BOOL XMVector3NotEqualInt 7507( 7508 FXMVECTOR V1, 7509 FXMVECTOR V2 7510) 7511{ 7512#if defined(_XM_NO_INTRINSICS_) 7513 return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0); 7514#elif defined(_XM_SSE_INTRINSICS_) 7515 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 7516 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0); 7517#else // _XM_VMX128_INTRINSICS_ 7518 return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2)); 7519#endif 7520} 7521 7522//------------------------------------------------------------------------------ 7523 7524XMFINLINE BOOL XMVector3Greater 7525( 7526 FXMVECTOR V1, 7527 FXMVECTOR V2 7528) 7529{ 7530#if defined(_XM_NO_INTRINSICS_) 7531 return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0); 7532#elif defined(_XM_SSE_INTRINSICS_) 7533 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); 7534 return (((_mm_movemask_ps(vTemp)&7)==7) != 0); 7535#else // _XM_VMX128_INTRINSICS_ 7536 return XMComparisonAllTrue(XMVector3GreaterR(V1, V2)); 7537#endif 7538} 7539 7540//------------------------------------------------------------------------------ 7541 7542XMFINLINE UINT XMVector3GreaterR 7543( 7544 FXMVECTOR V1, 7545 FXMVECTOR V2 7546) 7547{ 7548#if defined(_XM_NO_INTRINSICS_) 7549 UINT CR = 0; 7550 if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && 7551 (V1.vector4_f32[1] > V2.vector4_f32[1]) && 7552 (V1.vector4_f32[2] > V2.vector4_f32[2])) 7553 { 7554 CR = XM_CRMASK_CR6TRUE; 7555 } 7556 else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && 7557 (V1.vector4_f32[1] <= V2.vector4_f32[1]) && 7558 (V1.vector4_f32[2] <= V2.vector4_f32[2])) 7559 { 7560 CR = XM_CRMASK_CR6FALSE; 7561 } 7562 return CR; 7563 7564#elif defined(_XM_SSE_INTRINSICS_) 7565 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); 7566 UINT CR = 0; 7567 int iTest = _mm_movemask_ps(vTemp)&7; 7568 if (iTest==7) 7569 { 7570 CR = XM_CRMASK_CR6TRUE; 7571 } 7572 else if (!iTest) 7573 { 7574 CR = XM_CRMASK_CR6FALSE; 7575 } 7576 return CR; 7577#else // _XM_VMX128_INTRINSICS_ 7578#endif // _XM_VMX128_INTRINSICS_ 7579} 7580 7581//------------------------------------------------------------------------------ 7582 7583XMFINLINE BOOL XMVector3GreaterOrEqual 7584( 7585 FXMVECTOR V1, 7586 FXMVECTOR V2 7587) 7588{ 7589#if defined(_XM_NO_INTRINSICS_) 7590 return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0); 7591#elif defined(_XM_SSE_INTRINSICS_) 7592 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); 7593 return (((_mm_movemask_ps(vTemp)&7)==7) != 0); 7594#else // _XM_VMX128_INTRINSICS_ 7595 return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2)); 7596#endif 7597} 7598 7599//------------------------------------------------------------------------------ 7600 7601XMFINLINE UINT XMVector3GreaterOrEqualR 7602( 7603 FXMVECTOR V1, 7604 FXMVECTOR V2 7605) 7606{ 7607#if defined(_XM_NO_INTRINSICS_) 7608 7609 UINT CR = 0; 7610 if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && 7611 (V1.vector4_f32[1] >= V2.vector4_f32[1]) && 7612 (V1.vector4_f32[2] >= V2.vector4_f32[2])) 7613 { 7614 CR = XM_CRMASK_CR6TRUE; 7615 } 7616 else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && 7617 (V1.vector4_f32[1] < V2.vector4_f32[1]) && 7618 (V1.vector4_f32[2] < V2.vector4_f32[2])) 7619 { 7620 CR = XM_CRMASK_CR6FALSE; 7621 } 7622 return CR; 7623 7624#elif defined(_XM_SSE_INTRINSICS_) 7625 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); 7626 UINT CR = 0; 7627 int iTest = _mm_movemask_ps(vTemp)&7; 7628 if (iTest==7) 7629 { 7630 CR = XM_CRMASK_CR6TRUE; 7631 } 7632 else if (!iTest) 7633 { 7634 CR = XM_CRMASK_CR6FALSE; 7635 } 7636 return CR; 7637#else // _XM_VMX128_INTRINSICS_ 7638#endif // _XM_VMX128_INTRINSICS_ 7639} 7640 7641//------------------------------------------------------------------------------ 7642 7643XMFINLINE BOOL XMVector3Less 7644( 7645 FXMVECTOR V1, 7646 FXMVECTOR V2 7647) 7648{ 7649#if defined(_XM_NO_INTRINSICS_) 7650 return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0); 7651#elif defined(_XM_SSE_INTRINSICS_) 7652 XMVECTOR vTemp = _mm_cmplt_ps(V1,V2); 7653 return (((_mm_movemask_ps(vTemp)&7)==7) != 0); 7654#else // _XM_VMX128_INTRINSICS_ 7655 return XMComparisonAllTrue(XMVector3GreaterR(V2, V1)); 7656#endif 7657} 7658 7659//------------------------------------------------------------------------------ 7660 7661XMFINLINE BOOL XMVector3LessOrEqual 7662( 7663 FXMVECTOR V1, 7664 FXMVECTOR V2 7665) 7666{ 7667#if defined(_XM_NO_INTRINSICS_) 7668 return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0); 7669#elif defined(_XM_SSE_INTRINSICS_) 7670 XMVECTOR vTemp = _mm_cmple_ps(V1,V2); 7671 return (((_mm_movemask_ps(vTemp)&7)==7) != 0); 7672#else // _XM_VMX128_INTRINSICS_ 7673 return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1)); 7674#endif 7675} 7676 7677//------------------------------------------------------------------------------ 7678 7679XMFINLINE BOOL XMVector3InBounds 7680( 7681 FXMVECTOR V, 7682 FXMVECTOR Bounds 7683) 7684{ 7685#if defined(_XM_NO_INTRINSICS_) 7686 return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 7687 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && 7688 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0); 7689#elif defined(_XM_SSE_INTRINSICS_) 7690 // Test if less than or equal 7691 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 7692 // Negate the bounds 7693 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 7694 // Test if greater or equal (Reversed) 7695 vTemp2 = _mm_cmple_ps(vTemp2,V); 7696 // Blend answers 7697 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 7698 // x,y and z in bounds? (w is don't care) 7699 return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0); 7700#else 7701 return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds)); 7702#endif 7703} 7704 7705//------------------------------------------------------------------------------ 7706 7707XMFINLINE UINT XMVector3InBoundsR 7708( 7709 FXMVECTOR V, 7710 FXMVECTOR Bounds 7711) 7712{ 7713#if defined(_XM_NO_INTRINSICS_) 7714 UINT CR = 0; 7715 if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 7716 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && 7717 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) 7718 { 7719 CR = XM_CRMASK_CR6BOUNDS; 7720 } 7721 return CR; 7722 7723#elif defined(_XM_SSE_INTRINSICS_) 7724 // Test if less than or equal 7725 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 7726 // Negate the bounds 7727 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 7728 // Test if greater or equal (Reversed) 7729 vTemp2 = _mm_cmple_ps(vTemp2,V); 7730 // Blend answers 7731 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 7732 // x,y and z in bounds? (w is don't care) 7733 return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0; 7734#else // _XM_VMX128_INTRINSICS_ 7735#endif // _XM_VMX128_INTRINSICS_ 7736} 7737 7738//------------------------------------------------------------------------------ 7739 7740XMFINLINE BOOL XMVector3IsNaN 7741( 7742 FXMVECTOR V 7743) 7744{ 7745#if defined(_XM_NO_INTRINSICS_) 7746 7747 return (XMISNAN(V.vector4_f32[0]) || 7748 XMISNAN(V.vector4_f32[1]) || 7749 XMISNAN(V.vector4_f32[2])); 7750 7751#elif defined(_XM_SSE_INTRINSICS_) 7752 // Mask off the exponent 7753 __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity); 7754 // Mask off the mantissa 7755 __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest); 7756 // Are any of the exponents == 0x7F800000? 7757 vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity); 7758 // Are any of the mantissa's zero? (SSE2 doesn't have a neq test) 7759 vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero); 7760 // Perform a not on the NaN test to be true on NON-zero mantissas 7761 vTempNan = _mm_andnot_si128(vTempNan,vTempInf); 7762 // If x, y or z are NaN, the signs are true after the merge above 7763 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0); 7764#else // _XM_VMX128_INTRINSICS_ 7765#endif // _XM_VMX128_INTRINSICS_ 7766} 7767 7768//------------------------------------------------------------------------------ 7769 7770XMFINLINE BOOL XMVector3IsInfinite 7771( 7772 FXMVECTOR V 7773) 7774{ 7775#if defined(_XM_NO_INTRINSICS_) 7776 return (XMISINF(V.vector4_f32[0]) || 7777 XMISINF(V.vector4_f32[1]) || 7778 XMISINF(V.vector4_f32[2])); 7779#elif defined(_XM_SSE_INTRINSICS_) 7780 // Mask off the sign bit 7781 __m128 vTemp = _mm_and_ps(V,g_XMAbsMask); 7782 // Compare to infinity 7783 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); 7784 // If x,y or z are infinity, the signs are true. 7785 return ((_mm_movemask_ps(vTemp)&7) != 0); 7786#else // _XM_VMX128_INTRINSICS_ 7787#endif // _XM_VMX128_INTRINSICS_ 7788} 7789 7790//------------------------------------------------------------------------------ 7791// Computation operations 7792//------------------------------------------------------------------------------ 7793 7794//------------------------------------------------------------------------------ 7795 7796XMFINLINE XMVECTOR XMVector3Dot 7797( 7798 FXMVECTOR V1, 7799 FXMVECTOR V2 7800) 7801{ 7802#if defined(_XM_NO_INTRINSICS_) 7803 FLOAT fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2]; 7804 XMVECTOR vResult = { 7805 fValue, 7806 fValue, 7807 fValue, 7808 fValue 7809 }; 7810 return vResult; 7811 7812#elif defined(_XM_SSE_INTRINSICS_) 7813 // Perform the dot product 7814 XMVECTOR vDot = _mm_mul_ps(V1,V2); 7815 // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2] 7816 XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); 7817 // Result.vector4_f32[0] = x+y 7818 vDot = _mm_add_ss(vDot,vTemp); 7819 // x=Dot.vector4_f32[2] 7820 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); 7821 // Result.vector4_f32[0] = (x+y)+z 7822 vDot = _mm_add_ss(vDot,vTemp); 7823 // Splat x 7824 return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); 7825#else // _XM_VMX128_INTRINSICS_ 7826#endif // _XM_VMX128_INTRINSICS_ 7827} 7828 7829//------------------------------------------------------------------------------ 7830 7831XMFINLINE XMVECTOR XMVector3Cross 7832( 7833 FXMVECTOR V1, 7834 FXMVECTOR V2 7835) 7836{ 7837#if defined(_XM_NO_INTRINSICS_) 7838 XMVECTOR vResult = { 7839 (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]), 7840 (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]), 7841 (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]), 7842 0.0f 7843 }; 7844 return vResult; 7845 7846#elif defined(_XM_SSE_INTRINSICS_) 7847 // y1,z1,x1,w1 7848 XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1)); 7849 // z2,x2,y2,w2 7850 XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2)); 7851 // Perform the left operation 7852 XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2); 7853 // z1,x1,y1,w1 7854 vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1)); 7855 // y2,z2,x2,w2 7856 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2)); 7857 // Perform the right operation 7858 vTemp1 = _mm_mul_ps(vTemp1,vTemp2); 7859 // Subract the right from left, and return answer 7860 vResult = _mm_sub_ps(vResult,vTemp1); 7861 // Set w to zero 7862 return _mm_and_ps(vResult,g_XMMask3); 7863#else // _XM_VMX128_INTRINSICS_ 7864#endif // _XM_VMX128_INTRINSICS_ 7865} 7866 7867//------------------------------------------------------------------------------ 7868 7869XMFINLINE XMVECTOR XMVector3LengthSq 7870( 7871 FXMVECTOR V 7872) 7873{ 7874 return XMVector3Dot(V, V); 7875} 7876 7877//------------------------------------------------------------------------------ 7878 7879XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst 7880( 7881 FXMVECTOR V 7882) 7883{ 7884#if defined(_XM_NO_INTRINSICS_) 7885 7886 XMVECTOR Result; 7887 7888 Result = XMVector3LengthSq(V); 7889 Result = XMVectorReciprocalSqrtEst(Result); 7890 7891 return Result; 7892 7893#elif defined(_XM_SSE_INTRINSICS_) 7894 // Perform the dot product on x,y and z 7895 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 7896 // vTemp has z and y 7897 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2)); 7898 // x+z, y 7899 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 7900 // y,y,y,y 7901 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); 7902 // x+z+y,??,??,?? 7903 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 7904 // Splat the length squared 7905 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 7906 // Get the reciprocal 7907 vLengthSq = _mm_rsqrt_ps(vLengthSq); 7908 return vLengthSq; 7909#else // _XM_VMX128_INTRINSICS_ 7910#endif // _XM_VMX128_INTRINSICS_ 7911} 7912 7913//------------------------------------------------------------------------------ 7914 7915XMFINLINE XMVECTOR XMVector3ReciprocalLength 7916( 7917 FXMVECTOR V 7918) 7919{ 7920#if defined(_XM_NO_INTRINSICS_) 7921 7922 XMVECTOR Result; 7923 7924 Result = XMVector3LengthSq(V); 7925 Result = XMVectorReciprocalSqrt(Result); 7926 7927 return Result; 7928 7929#elif defined(_XM_SSE_INTRINSICS_) 7930 // Perform the dot product 7931 XMVECTOR vDot = _mm_mul_ps(V,V); 7932 // x=Dot.y, y=Dot.z 7933 XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); 7934 // Result.x = x+y 7935 vDot = _mm_add_ss(vDot,vTemp); 7936 // x=Dot.z 7937 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); 7938 // Result.x = (x+y)+z 7939 vDot = _mm_add_ss(vDot,vTemp); 7940 // Splat x 7941 vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); 7942 // Get the reciprocal 7943 vDot = _mm_sqrt_ps(vDot); 7944 // Get the reciprocal 7945 vDot = _mm_div_ps(g_XMOne,vDot); 7946 return vDot; 7947#else // _XM_VMX128_INTRINSICS_ 7948#endif // _XM_VMX128_INTRINSICS_ 7949} 7950 7951//------------------------------------------------------------------------------ 7952 7953XMFINLINE XMVECTOR XMVector3LengthEst 7954( 7955 FXMVECTOR V 7956) 7957{ 7958#if defined(_XM_NO_INTRINSICS_) 7959 7960 XMVECTOR Result; 7961 7962 Result = XMVector3LengthSq(V); 7963 Result = XMVectorSqrtEst(Result); 7964 7965 return Result; 7966 7967#elif defined(_XM_SSE_INTRINSICS_) 7968 // Perform the dot product on x,y and z 7969 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 7970 // vTemp has z and y 7971 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2)); 7972 // x+z, y 7973 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 7974 // y,y,y,y 7975 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); 7976 // x+z+y,??,??,?? 7977 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 7978 // Splat the length squared 7979 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 7980 // Get the length 7981 vLengthSq = _mm_sqrt_ps(vLengthSq); 7982 return vLengthSq; 7983#else // _XM_VMX128_INTRINSICS_ 7984#endif // _XM_VMX128_INTRINSICS_ 7985} 7986 7987//------------------------------------------------------------------------------ 7988 7989XMFINLINE XMVECTOR XMVector3Length 7990( 7991 FXMVECTOR V 7992) 7993{ 7994#if defined(_XM_NO_INTRINSICS_) 7995 7996 XMVECTOR Result; 7997 7998 Result = XMVector3LengthSq(V); 7999 Result = XMVectorSqrt(Result); 8000 8001 return Result; 8002 8003#elif defined(_XM_SSE_INTRINSICS_) 8004 // Perform the dot product on x,y and z 8005 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 8006 // vTemp has z and y 8007 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2)); 8008 // x+z, y 8009 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 8010 // y,y,y,y 8011 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); 8012 // x+z+y,??,??,?? 8013 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 8014 // Splat the length squared 8015 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 8016 // Get the length 8017 vLengthSq = _mm_sqrt_ps(vLengthSq); 8018 return vLengthSq; 8019#else // _XM_VMX128_INTRINSICS_ 8020#endif // _XM_VMX128_INTRINSICS_ 8021} 8022 8023//------------------------------------------------------------------------------ 8024// XMVector3NormalizeEst uses a reciprocal estimate and 8025// returns QNaN on zero and infinite vectors. 8026 8027XMFINLINE XMVECTOR XMVector3NormalizeEst 8028( 8029 FXMVECTOR V 8030) 8031{ 8032#if defined(_XM_NO_INTRINSICS_) 8033 8034 XMVECTOR Result; 8035 Result = XMVector3ReciprocalLength(V); 8036 Result = XMVectorMultiply(V, Result); 8037 return Result; 8038 8039#elif defined(_XM_SSE_INTRINSICS_) 8040 // Perform the dot product 8041 XMVECTOR vDot = _mm_mul_ps(V,V); 8042 // x=Dot.y, y=Dot.z 8043 XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); 8044 // Result.x = x+y 8045 vDot = _mm_add_ss(vDot,vTemp); 8046 // x=Dot.z 8047 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); 8048 // Result.x = (x+y)+z 8049 vDot = _mm_add_ss(vDot,vTemp); 8050 // Splat x 8051 vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); 8052 // Get the reciprocal 8053 vDot = _mm_rsqrt_ps(vDot); 8054 // Perform the normalization 8055 vDot = _mm_mul_ps(vDot,V); 8056 return vDot; 8057#else // _XM_VMX128_INTRINSICS_ 8058#endif // _XM_VMX128_INTRINSICS_ 8059} 8060 8061//------------------------------------------------------------------------------ 8062 8063XMFINLINE XMVECTOR XMVector3Normalize 8064( 8065 FXMVECTOR V 8066) 8067{ 8068#if defined(_XM_NO_INTRINSICS_) 8069 FLOAT fLength; 8070 XMVECTOR vResult; 8071 8072 vResult = XMVector3Length( V ); 8073 fLength = vResult.vector4_f32[0]; 8074 8075 // Prevent divide by zero 8076 if (fLength > 0) { 8077 fLength = 1.0f/fLength; 8078 } 8079 8080 vResult.vector4_f32[0] = V.vector4_f32[0]*fLength; 8081 vResult.vector4_f32[1] = V.vector4_f32[1]*fLength; 8082 vResult.vector4_f32[2] = V.vector4_f32[2]*fLength; 8083 vResult.vector4_f32[3] = V.vector4_f32[3]*fLength; 8084 return vResult; 8085 8086#elif defined(_XM_SSE_INTRINSICS_) 8087 // Perform the dot product on x,y and z only 8088 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 8089 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1)); 8090 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 8091 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); 8092 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 8093 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); 8094 // Prepare for the division 8095 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); 8096 // Create zero with a single instruction 8097 XMVECTOR vZeroMask = _mm_setzero_ps(); 8098 // Test for a divide by zero (Must be FP to detect -0.0) 8099 vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult); 8100 // Failsafe on zero (Or epsilon) length planes 8101 // If the length is infinity, set the elements to zero 8102 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); 8103 // Divide to perform the normalization 8104 vResult = _mm_div_ps(V,vResult); 8105 // Any that are infinity, set to zero 8106 vResult = _mm_and_ps(vResult,vZeroMask); 8107 // Select qnan or result based on infinite length 8108 XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN); 8109 XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq); 8110 vResult = _mm_or_ps(vTemp1,vTemp2); 8111 return vResult; 8112#else // _XM_VMX128_INTRINSICS_ 8113#endif // _XM_VMX128_INTRINSICS_ 8114} 8115 8116//------------------------------------------------------------------------------ 8117 8118XMFINLINE XMVECTOR XMVector3ClampLength 8119( 8120 FXMVECTOR V, 8121 FLOAT LengthMin, 8122 FLOAT LengthMax 8123) 8124{ 8125#if defined(_XM_NO_INTRINSICS_) 8126 8127 XMVECTOR ClampMax; 8128 XMVECTOR ClampMin; 8129 8130 ClampMax = XMVectorReplicate(LengthMax); 8131 ClampMin = XMVectorReplicate(LengthMin); 8132 8133 return XMVector3ClampLengthV(V, ClampMin, ClampMax); 8134 8135#elif defined(_XM_SSE_INTRINSICS_) 8136 XMVECTOR ClampMax = _mm_set_ps1(LengthMax); 8137 XMVECTOR ClampMin = _mm_set_ps1(LengthMin); 8138 return XMVector3ClampLengthV(V,ClampMin,ClampMax); 8139#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 8140#endif // _XM_VMX128_INTRINSICS_ 8141} 8142 8143//------------------------------------------------------------------------------ 8144 8145XMFINLINE XMVECTOR XMVector3ClampLengthV 8146( 8147 FXMVECTOR V, 8148 FXMVECTOR LengthMin, 8149 FXMVECTOR LengthMax 8150) 8151{ 8152#if defined(_XM_NO_INTRINSICS_) 8153 8154 XMVECTOR ClampLength; 8155 XMVECTOR LengthSq; 8156 XMVECTOR RcpLength; 8157 XMVECTOR Length; 8158 XMVECTOR Normal; 8159 XMVECTOR Zero; 8160 XMVECTOR InfiniteLength; 8161 XMVECTOR ZeroLength; 8162 XMVECTOR Select; 8163 XMVECTOR ControlMax; 8164 XMVECTOR ControlMin; 8165 XMVECTOR Control; 8166 XMVECTOR Result; 8167 8168 XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0])); 8169 XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0])); 8170 XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero())); 8171 XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero())); 8172 XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin)); 8173 8174 LengthSq = XMVector3LengthSq(V); 8175 8176 Zero = XMVectorZero(); 8177 8178 RcpLength = XMVectorReciprocalSqrt(LengthSq); 8179 8180 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); 8181 ZeroLength = XMVectorEqual(LengthSq, Zero); 8182 8183 Normal = XMVectorMultiply(V, RcpLength); 8184 8185 Length = XMVectorMultiply(LengthSq, RcpLength); 8186 8187 Select = XMVectorEqualInt(InfiniteLength, ZeroLength); 8188 Length = XMVectorSelect(LengthSq, Length, Select); 8189 Normal = XMVectorSelect(LengthSq, Normal, Select); 8190 8191 ControlMax = XMVectorGreater(Length, LengthMax); 8192 ControlMin = XMVectorLess(Length, LengthMin); 8193 8194 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); 8195 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); 8196 8197 Result = XMVectorMultiply(Normal, ClampLength); 8198 8199 // Preserve the original vector (with no precision loss) if the length falls within the given range 8200 Control = XMVectorEqualInt(ControlMax, ControlMin); 8201 Result = XMVectorSelect(Result, V, Control); 8202 8203 return Result; 8204 8205#elif defined(_XM_SSE_INTRINSICS_) 8206 XMVECTOR ClampLength; 8207 XMVECTOR LengthSq; 8208 XMVECTOR RcpLength; 8209 XMVECTOR Length; 8210 XMVECTOR Normal; 8211 XMVECTOR InfiniteLength; 8212 XMVECTOR ZeroLength; 8213 XMVECTOR Select; 8214 XMVECTOR ControlMax; 8215 XMVECTOR ControlMin; 8216 XMVECTOR Control; 8217 XMVECTOR Result; 8218 8219 XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin))); 8220 XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax))); 8221 XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero)); 8222 XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero)); 8223 XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin)); 8224 8225 LengthSq = XMVector3LengthSq(V); 8226 RcpLength = XMVectorReciprocalSqrt(LengthSq); 8227 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity); 8228 ZeroLength = XMVectorEqual(LengthSq,g_XMZero); 8229 Normal = _mm_mul_ps(V, RcpLength); 8230 Length = _mm_mul_ps(LengthSq, RcpLength); 8231 Select = XMVectorEqualInt(InfiniteLength, ZeroLength); 8232 Length = XMVectorSelect(LengthSq, Length, Select); 8233 Normal = XMVectorSelect(LengthSq, Normal, Select); 8234 ControlMax = XMVectorGreater(Length, LengthMax); 8235 ControlMin = XMVectorLess(Length, LengthMin); 8236 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); 8237 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); 8238 Result = _mm_mul_ps(Normal, ClampLength); 8239 // Preserve the original vector (with no precision loss) if the length falls within the given range 8240 Control = XMVectorEqualInt(ControlMax, ControlMin); 8241 Result = XMVectorSelect(Result, V, Control); 8242 return Result; 8243#else // _XM_VMX128_INTRINSICS_ 8244#endif // _XM_VMX128_INTRINSICS_ 8245} 8246 8247//------------------------------------------------------------------------------ 8248 8249XMFINLINE XMVECTOR XMVector3Reflect 8250( 8251 FXMVECTOR Incident, 8252 FXMVECTOR Normal 8253) 8254{ 8255#if defined(_XM_NO_INTRINSICS_) 8256 8257 XMVECTOR Result; 8258 8259 // Result = Incident - (2 * dot(Incident, Normal)) * Normal 8260 Result = XMVector3Dot(Incident, Normal); 8261 Result = XMVectorAdd(Result, Result); 8262 Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); 8263 8264 return Result; 8265 8266#elif defined(_XM_SSE_INTRINSICS_) 8267 // Result = Incident - (2 * dot(Incident, Normal)) * Normal 8268 XMVECTOR Result = XMVector3Dot(Incident, Normal); 8269 Result = _mm_add_ps(Result, Result); 8270 Result = _mm_mul_ps(Result, Normal); 8271 Result = _mm_sub_ps(Incident,Result); 8272 return Result; 8273#else // _XM_VMX128_INTRINSICS_ 8274#endif // _XM_VMX128_INTRINSICS_ 8275} 8276 8277//------------------------------------------------------------------------------ 8278 8279XMFINLINE XMVECTOR XMVector3Refract 8280( 8281 FXMVECTOR Incident, 8282 FXMVECTOR Normal, 8283 FLOAT RefractionIndex 8284) 8285{ 8286#if defined(_XM_NO_INTRINSICS_) 8287 8288 XMVECTOR Index; 8289 Index = XMVectorReplicate(RefractionIndex); 8290 return XMVector3RefractV(Incident, Normal, Index); 8291 8292#elif defined(_XM_SSE_INTRINSICS_) 8293 XMVECTOR Index = _mm_set_ps1(RefractionIndex); 8294 return XMVector3RefractV(Incident,Normal,Index); 8295#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 8296#endif // _XM_VMX128_INTRINSICS_ 8297} 8298 8299//------------------------------------------------------------------------------ 8300 8301XMFINLINE XMVECTOR XMVector3RefractV 8302( 8303 FXMVECTOR Incident, 8304 FXMVECTOR Normal, 8305 FXMVECTOR RefractionIndex 8306) 8307{ 8308#if defined(_XM_NO_INTRINSICS_) 8309 8310 XMVECTOR IDotN; 8311 XMVECTOR R; 8312 CONST XMVECTOR Zero = XMVectorZero(); 8313 8314 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 8315 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) 8316 8317 IDotN = XMVector3Dot(Incident, Normal); 8318 8319 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) 8320 R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); 8321 R = XMVectorMultiply(R, RefractionIndex); 8322 R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); 8323 8324 if (XMVector4LessOrEqual(R, Zero)) 8325 { 8326 // Total internal reflection 8327 return Zero; 8328 } 8329 else 8330 { 8331 XMVECTOR Result; 8332 8333 // R = RefractionIndex * IDotN + sqrt(R) 8334 R = XMVectorSqrt(R); 8335 R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); 8336 8337 // Result = RefractionIndex * Incident - Normal * R 8338 Result = XMVectorMultiply(RefractionIndex, Incident); 8339 Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); 8340 8341 return Result; 8342 } 8343 8344#elif defined(_XM_SSE_INTRINSICS_) 8345 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 8346 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) 8347 XMVECTOR IDotN = XMVector3Dot(Incident, Normal); 8348 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) 8349 XMVECTOR R = _mm_mul_ps(IDotN, IDotN); 8350 R = _mm_sub_ps(g_XMOne,R); 8351 R = _mm_mul_ps(R, RefractionIndex); 8352 R = _mm_mul_ps(R, RefractionIndex); 8353 R = _mm_sub_ps(g_XMOne,R); 8354 8355 XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero); 8356 if (_mm_movemask_ps(vResult)==0x0f) 8357 { 8358 // Total internal reflection 8359 vResult = g_XMZero; 8360 } 8361 else 8362 { 8363 // R = RefractionIndex * IDotN + sqrt(R) 8364 R = _mm_sqrt_ps(R); 8365 vResult = _mm_mul_ps(RefractionIndex,IDotN); 8366 R = _mm_add_ps(R,vResult); 8367 // Result = RefractionIndex * Incident - Normal * R 8368 vResult = _mm_mul_ps(RefractionIndex, Incident); 8369 R = _mm_mul_ps(R,Normal); 8370 vResult = _mm_sub_ps(vResult,R); 8371 } 8372 return vResult; 8373#else // _XM_VMX128_INTRINSICS_ 8374#endif // _XM_VMX128_INTRINSICS_ 8375} 8376 8377//------------------------------------------------------------------------------ 8378 8379XMFINLINE XMVECTOR XMVector3Orthogonal 8380( 8381 FXMVECTOR V 8382) 8383{ 8384#if defined(_XM_NO_INTRINSICS_) 8385 8386 XMVECTOR NegativeV; 8387 XMVECTOR Z, YZYY; 8388 XMVECTOR ZIsNegative, YZYYIsNegative; 8389 XMVECTOR S, D; 8390 XMVECTOR R0, R1; 8391 XMVECTOR Select; 8392 XMVECTOR Zero; 8393 XMVECTOR Result; 8394 static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; 8395 static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; 8396 8397 Zero = XMVectorZero(); 8398 Z = XMVectorSplatZ(V); 8399 YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v); 8400 8401 NegativeV = XMVectorSubtract(Zero, V); 8402 8403 ZIsNegative = XMVectorLess(Z, Zero); 8404 YZYYIsNegative = XMVectorLess(YZYY, Zero); 8405 8406 S = XMVectorAdd(YZYY, Z); 8407 D = XMVectorSubtract(YZYY, Z); 8408 8409 Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative); 8410 8411 R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v); 8412 R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v); 8413 8414 Result = XMVectorSelect(R1, R0, Select); 8415 8416 return Result; 8417 8418#elif defined(_XM_SSE_INTRINSICS_) 8419 XMVECTOR NegativeV; 8420 XMVECTOR Z, YZYY; 8421 XMVECTOR ZIsNegative, YZYYIsNegative; 8422 XMVECTOR S, D; 8423 XMVECTOR R0, R1; 8424 XMVECTOR Select; 8425 XMVECTOR Zero; 8426 XMVECTOR Result; 8427 static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; 8428 static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; 8429 8430 Zero = XMVectorZero(); 8431 Z = XMVectorSplatZ(V); 8432 YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y); 8433 8434 NegativeV = _mm_sub_ps(Zero, V); 8435 8436 ZIsNegative = XMVectorLess(Z, Zero); 8437 YZYYIsNegative = XMVectorLess(YZYY, Zero); 8438 8439 S = _mm_add_ps(YZYY, Z); 8440 D = _mm_sub_ps(YZYY, Z); 8441 8442 Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative); 8443 8444 R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X); 8445 R1 = XMVectorPermute(V, D,Permute1X0X0X0X); 8446 Result = XMVectorSelect(R1, R0, Select); 8447 return Result; 8448#else // _XM_VMX128_INTRINSICS_ 8449#endif // _XM_VMX128_INTRINSICS_ 8450} 8451 8452//------------------------------------------------------------------------------ 8453 8454XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst 8455( 8456 FXMVECTOR N1, 8457 FXMVECTOR N2 8458) 8459{ 8460#if defined(_XM_NO_INTRINSICS_) 8461 8462 XMVECTOR Result; 8463 XMVECTOR NegativeOne; 8464 XMVECTOR One; 8465 8466 Result = XMVector3Dot(N1, N2); 8467 NegativeOne = XMVectorSplatConstant(-1, 0); 8468 One = XMVectorSplatOne(); 8469 Result = XMVectorClamp(Result, NegativeOne, One); 8470 Result = XMVectorACosEst(Result); 8471 8472 return Result; 8473 8474#elif defined(_XM_SSE_INTRINSICS_) 8475 XMVECTOR vResult = XMVector3Dot(N1,N2); 8476 // Clamp to -1.0f to 1.0f 8477 vResult = _mm_max_ps(vResult,g_XMNegativeOne); 8478 vResult = _mm_min_ps(vResult,g_XMOne); 8479 vResult = XMVectorACosEst(vResult); 8480 return vResult; 8481#else // _XM_VMX128_INTRINSICS_ 8482#endif // _XM_VMX128_INTRINSICS_ 8483} 8484 8485//------------------------------------------------------------------------------ 8486 8487XMFINLINE XMVECTOR XMVector3AngleBetweenNormals 8488( 8489 FXMVECTOR N1, 8490 FXMVECTOR N2 8491) 8492{ 8493#if defined(_XM_NO_INTRINSICS_) 8494 8495 XMVECTOR Result; 8496 XMVECTOR NegativeOne; 8497 XMVECTOR One; 8498 8499 Result = XMVector3Dot(N1, N2); 8500 NegativeOne = XMVectorSplatConstant(-1, 0); 8501 One = XMVectorSplatOne(); 8502 Result = XMVectorClamp(Result, NegativeOne, One); 8503 Result = XMVectorACos(Result); 8504 8505 return Result; 8506 8507#elif defined(_XM_SSE_INTRINSICS_) 8508 XMVECTOR vResult = XMVector3Dot(N1,N2); 8509 // Clamp to -1.0f to 1.0f 8510 vResult = _mm_max_ps(vResult,g_XMNegativeOne); 8511 vResult = _mm_min_ps(vResult,g_XMOne); 8512 vResult = XMVectorACos(vResult); 8513 return vResult; 8514#else // _XM_VMX128_INTRINSICS_ 8515#endif // _XM_VMX128_INTRINSICS_ 8516} 8517 8518//------------------------------------------------------------------------------ 8519 8520XMFINLINE XMVECTOR XMVector3AngleBetweenVectors 8521( 8522 FXMVECTOR V1, 8523 FXMVECTOR V2 8524) 8525{ 8526#if defined(_XM_NO_INTRINSICS_) 8527 8528 XMVECTOR L1; 8529 XMVECTOR L2; 8530 XMVECTOR Dot; 8531 XMVECTOR CosAngle; 8532 XMVECTOR NegativeOne; 8533 XMVECTOR One; 8534 XMVECTOR Result; 8535 8536 L1 = XMVector3ReciprocalLength(V1); 8537 L2 = XMVector3ReciprocalLength(V2); 8538 8539 Dot = XMVector3Dot(V1, V2); 8540 8541 L1 = XMVectorMultiply(L1, L2); 8542 8543 NegativeOne = XMVectorSplatConstant(-1, 0); 8544 One = XMVectorSplatOne(); 8545 8546 CosAngle = XMVectorMultiply(Dot, L1); 8547 8548 CosAngle = XMVectorClamp(CosAngle, NegativeOne, One); 8549 8550 Result = XMVectorACos(CosAngle); 8551 8552 return Result; 8553 8554#elif defined(_XM_SSE_INTRINSICS_) 8555 XMVECTOR L1; 8556 XMVECTOR L2; 8557 XMVECTOR Dot; 8558 XMVECTOR CosAngle; 8559 XMVECTOR Result; 8560 8561 L1 = XMVector3ReciprocalLength(V1); 8562 L2 = XMVector3ReciprocalLength(V2); 8563 Dot = XMVector3Dot(V1, V2); 8564 L1 = _mm_mul_ps(L1, L2); 8565 CosAngle = _mm_mul_ps(Dot, L1); 8566 CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne); 8567 Result = XMVectorACos(CosAngle); 8568 return Result; 8569#else // _XM_VMX128_INTRINSICS_ 8570#endif // _XM_VMX128_INTRINSICS_ 8571} 8572 8573//------------------------------------------------------------------------------ 8574 8575XMFINLINE XMVECTOR XMVector3LinePointDistance 8576( 8577 FXMVECTOR LinePoint1, 8578 FXMVECTOR LinePoint2, 8579 FXMVECTOR Point 8580) 8581{ 8582#if defined(_XM_NO_INTRINSICS_) 8583 8584 XMVECTOR PointVector; 8585 XMVECTOR LineVector; 8586 XMVECTOR ReciprocalLengthSq; 8587 XMVECTOR PointProjectionScale; 8588 XMVECTOR DistanceVector; 8589 XMVECTOR Result; 8590 8591 // Given a vector PointVector from LinePoint1 to Point and a vector 8592 // LineVector from LinePoint1 to LinePoint2, the scaled distance 8593 // PointProjectionScale from LinePoint1 to the perpendicular projection 8594 // of PointVector onto the line is defined as: 8595 // 8596 // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector) 8597 8598 PointVector = XMVectorSubtract(Point, LinePoint1); 8599 LineVector = XMVectorSubtract(LinePoint2, LinePoint1); 8600 8601 ReciprocalLengthSq = XMVector3LengthSq(LineVector); 8602 ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq); 8603 8604 PointProjectionScale = XMVector3Dot(PointVector, LineVector); 8605 PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq); 8606 8607 DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale); 8608 DistanceVector = XMVectorSubtract(PointVector, DistanceVector); 8609 8610 Result = XMVector3Length(DistanceVector); 8611 8612 return Result; 8613 8614#elif defined(_XM_SSE_INTRINSICS_) 8615 XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1); 8616 XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1); 8617 XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector); 8618 XMVECTOR vResult = XMVector3Dot(PointVector,LineVector); 8619 vResult = _mm_div_ps(vResult,ReciprocalLengthSq); 8620 vResult = _mm_mul_ps(vResult,LineVector); 8621 vResult = _mm_sub_ps(PointVector,vResult); 8622 vResult = XMVector3Length(vResult); 8623 return vResult; 8624#else // _XM_VMX128_INTRINSICS_ 8625#endif // _XM_VMX128_INTRINSICS_ 8626} 8627 8628//------------------------------------------------------------------------------ 8629 8630XMFINLINE VOID XMVector3ComponentsFromNormal 8631( 8632 XMVECTOR* pParallel, 8633 XMVECTOR* pPerpendicular, 8634 FXMVECTOR V, 8635 FXMVECTOR Normal 8636) 8637{ 8638#if defined(_XM_NO_INTRINSICS_) 8639 8640 XMVECTOR Parallel; 8641 XMVECTOR Scale; 8642 8643 XMASSERT(pParallel); 8644 XMASSERT(pPerpendicular); 8645 8646 Scale = XMVector3Dot(V, Normal); 8647 8648 Parallel = XMVectorMultiply(Normal, Scale); 8649 8650 *pParallel = Parallel; 8651 *pPerpendicular = XMVectorSubtract(V, Parallel); 8652 8653#elif defined(_XM_SSE_INTRINSICS_) 8654 XMASSERT(pParallel); 8655 XMASSERT(pPerpendicular); 8656 XMVECTOR Scale = XMVector3Dot(V, Normal); 8657 XMVECTOR Parallel = _mm_mul_ps(Normal,Scale); 8658 *pParallel = Parallel; 8659 *pPerpendicular = _mm_sub_ps(V,Parallel); 8660#else // _XM_VMX128_INTRINSICS_ 8661#endif // _XM_VMX128_INTRINSICS_ 8662} 8663 8664//------------------------------------------------------------------------------ 8665// Transform a vector using a rotation expressed as a unit quaternion 8666 8667XMFINLINE XMVECTOR XMVector3Rotate 8668( 8669 FXMVECTOR V, 8670 FXMVECTOR RotationQuaternion 8671) 8672{ 8673#if defined(_XM_NO_INTRINSICS_) 8674 8675 XMVECTOR A; 8676 XMVECTOR Q; 8677 XMVECTOR Result; 8678 8679 A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); 8680 Q = XMQuaternionConjugate(RotationQuaternion); 8681 Result = XMQuaternionMultiply(Q, A); 8682 Result = XMQuaternionMultiply(Result, RotationQuaternion); 8683 8684 return Result; 8685 8686#elif defined(_XM_SSE_INTRINSICS_) 8687 XMVECTOR A; 8688 XMVECTOR Q; 8689 XMVECTOR Result; 8690 8691 A = _mm_and_ps(V,g_XMMask3); 8692 Q = XMQuaternionConjugate(RotationQuaternion); 8693 Result = XMQuaternionMultiply(Q, A); 8694 Result = XMQuaternionMultiply(Result, RotationQuaternion); 8695 return Result; 8696#else // _XM_VMX128_INTRINSICS_ 8697#endif // _XM_VMX128_INTRINSICS_ 8698} 8699 8700//------------------------------------------------------------------------------ 8701// Transform a vector using the inverse of a rotation expressed as a unit quaternion 8702 8703XMFINLINE XMVECTOR XMVector3InverseRotate 8704( 8705 FXMVECTOR V, 8706 FXMVECTOR RotationQuaternion 8707) 8708{ 8709#if defined(_XM_NO_INTRINSICS_) 8710 8711 XMVECTOR A; 8712 XMVECTOR Q; 8713 XMVECTOR Result; 8714 8715 A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); 8716 Result = XMQuaternionMultiply(RotationQuaternion, A); 8717 Q = XMQuaternionConjugate(RotationQuaternion); 8718 Result = XMQuaternionMultiply(Result, Q); 8719 8720 return Result; 8721 8722#elif defined(_XM_SSE_INTRINSICS_) 8723 XMVECTOR A; 8724 XMVECTOR Q; 8725 XMVECTOR Result; 8726 A = _mm_and_ps(V,g_XMMask3); 8727 Result = XMQuaternionMultiply(RotationQuaternion, A); 8728 Q = XMQuaternionConjugate(RotationQuaternion); 8729 Result = XMQuaternionMultiply(Result, Q); 8730 return Result; 8731#else // _XM_VMX128_INTRINSICS_ 8732#endif // _XM_VMX128_INTRINSICS_ 8733} 8734 8735//------------------------------------------------------------------------------ 8736 8737XMFINLINE XMVECTOR XMVector3Transform 8738( 8739 FXMVECTOR V, 8740 CXMMATRIX M 8741) 8742{ 8743#if defined(_XM_NO_INTRINSICS_) 8744 8745 XMVECTOR X; 8746 XMVECTOR Y; 8747 XMVECTOR Z; 8748 XMVECTOR Result; 8749 8750 Z = XMVectorSplatZ(V); 8751 Y = XMVectorSplatY(V); 8752 X = XMVectorSplatX(V); 8753 8754 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); 8755 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 8756 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 8757 8758 return Result; 8759 8760#elif defined(_XM_SSE_INTRINSICS_) 8761 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); 8762 vResult = _mm_mul_ps(vResult,M.r[0]); 8763 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 8764 vTemp = _mm_mul_ps(vTemp,M.r[1]); 8765 vResult = _mm_add_ps(vResult,vTemp); 8766 vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); 8767 vTemp = _mm_mul_ps(vTemp,M.r[2]); 8768 vResult = _mm_add_ps(vResult,vTemp); 8769 vResult = _mm_add_ps(vResult,M.r[3]); 8770 return vResult; 8771#else // _XM_VMX128_INTRINSICS_ 8772#endif // _XM_VMX128_INTRINSICS_ 8773} 8774 8775//------------------------------------------------------------------------------ 8776 8777XMINLINE XMFLOAT4* XMVector3TransformStream 8778( 8779 XMFLOAT4* pOutputStream, 8780 UINT OutputStride, 8781 CONST XMFLOAT3* pInputStream, 8782 UINT InputStride, 8783 UINT VectorCount, 8784 CXMMATRIX M 8785) 8786{ 8787#if defined(_XM_NO_INTRINSICS_) 8788 8789 XMVECTOR V; 8790 XMVECTOR X; 8791 XMVECTOR Y; 8792 XMVECTOR Z; 8793 XMVECTOR Result; 8794 UINT i; 8795 BYTE* pInputVector = (BYTE*)pInputStream; 8796 BYTE* pOutputVector = (BYTE*)pOutputStream; 8797 8798 XMASSERT(pOutputStream); 8799 XMASSERT(pInputStream); 8800 8801 for (i = 0; i < VectorCount; i++) 8802 { 8803 V = XMLoadFloat3((XMFLOAT3*)pInputVector); 8804 Z = XMVectorSplatZ(V); 8805 Y = XMVectorSplatY(V); 8806 X = XMVectorSplatX(V); 8807 8808 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); 8809 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 8810 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 8811 8812 XMStoreFloat4((XMFLOAT4*)pOutputVector, Result); 8813 8814 pInputVector += InputStride; 8815 pOutputVector += OutputStride; 8816 } 8817 8818 return pOutputStream; 8819 8820#elif defined(_XM_SSE_INTRINSICS_) 8821 XMASSERT(pOutputStream); 8822 XMASSERT(pInputStream); 8823 UINT i; 8824 const BYTE* pInputVector = (const BYTE*)pInputStream; 8825 BYTE* pOutputVector = (BYTE*)pOutputStream; 8826 8827 for (i = 0; i < VectorCount; i++) 8828 { 8829 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x); 8830 XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y); 8831 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z); 8832 vResult = _mm_mul_ps(vResult,M.r[2]); 8833 vResult = _mm_add_ps(vResult,M.r[3]); 8834 Y = _mm_mul_ps(Y,M.r[1]); 8835 vResult = _mm_add_ps(vResult,Y); 8836 X = _mm_mul_ps(X,M.r[0]); 8837 vResult = _mm_add_ps(vResult,X); 8838 _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult); 8839 pInputVector += InputStride; 8840 pOutputVector += OutputStride; 8841 } 8842 8843 return pOutputStream; 8844#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 8845#endif // _XM_VMX128_INTRINSICS_ 8846} 8847 8848//------------------------------------------------------------------------------ 8849 8850XMINLINE XMFLOAT4* XMVector3TransformStreamNC 8851( 8852 XMFLOAT4* pOutputStream, 8853 UINT OutputStride, 8854 CONST XMFLOAT3* pInputStream, 8855 UINT InputStride, 8856 UINT VectorCount, 8857 CXMMATRIX M 8858) 8859{ 8860#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_) 8861 return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M ); 8862#else // _XM_VMX128_INTRINSICS_ 8863#endif // _XM_VMX128_INTRINSICS_ 8864} 8865 8866//------------------------------------------------------------------------------ 8867 8868XMFINLINE XMVECTOR XMVector3TransformCoord 8869( 8870 FXMVECTOR V, 8871 CXMMATRIX M 8872) 8873{ 8874#if defined(_XM_NO_INTRINSICS_) 8875 8876 XMVECTOR X; 8877 XMVECTOR Y; 8878 XMVECTOR Z; 8879 XMVECTOR InverseW; 8880 XMVECTOR Result; 8881 8882 Z = XMVectorSplatZ(V); 8883 Y = XMVectorSplatY(V); 8884 X = XMVectorSplatX(V); 8885 8886 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); 8887 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 8888 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 8889 8890 InverseW = XMVectorSplatW(Result); 8891 InverseW = XMVectorReciprocal(InverseW); 8892 8893 Result = XMVectorMultiply(Result, InverseW); 8894 8895 return Result; 8896 8897#elif defined(_XM_SSE_INTRINSICS_) 8898 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); 8899 vResult = _mm_mul_ps(vResult,M.r[0]); 8900 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 8901 vTemp = _mm_mul_ps(vTemp,M.r[1]); 8902 vResult = _mm_add_ps(vResult,vTemp); 8903 vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); 8904 vTemp = _mm_mul_ps(vTemp,M.r[2]); 8905 vResult = _mm_add_ps(vResult,vTemp); 8906 vResult = _mm_add_ps(vResult,M.r[3]); 8907 vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); 8908 vResult = _mm_div_ps(vResult,vTemp); 8909 return vResult; 8910#else // _XM_VMX128_INTRINSICS_ 8911#endif // _XM_VMX128_INTRINSICS_ 8912} 8913 8914//------------------------------------------------------------------------------ 8915 8916XMINLINE XMFLOAT3* XMVector3TransformCoordStream 8917( 8918 XMFLOAT3* pOutputStream, 8919 UINT OutputStride, 8920 CONST XMFLOAT3* pInputStream, 8921 UINT InputStride, 8922 UINT VectorCount, 8923 CXMMATRIX M 8924) 8925{ 8926#if defined(_XM_NO_INTRINSICS_) 8927 8928 XMVECTOR V; 8929 XMVECTOR X; 8930 XMVECTOR Y; 8931 XMVECTOR Z; 8932 XMVECTOR InverseW; 8933 XMVECTOR Result; 8934 UINT i; 8935 BYTE* pInputVector = (BYTE*)pInputStream; 8936 BYTE* pOutputVector = (BYTE*)pOutputStream; 8937 8938 XMASSERT(pOutputStream); 8939 XMASSERT(pInputStream); 8940 8941 for (i = 0; i < VectorCount; i++) 8942 { 8943 V = XMLoadFloat3((XMFLOAT3*)pInputVector); 8944 Z = XMVectorSplatZ(V); 8945 Y = XMVectorSplatY(V); 8946 X = XMVectorSplatX(V); 8947// Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z); 8948// Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y); 8949// X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x); 8950 8951 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); 8952 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 8953 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 8954 8955 InverseW = XMVectorSplatW(Result); 8956 InverseW = XMVectorReciprocal(InverseW); 8957 8958 Result = XMVectorMultiply(Result, InverseW); 8959 8960 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); 8961 8962 pInputVector += InputStride; 8963 pOutputVector += OutputStride; 8964 } 8965 8966 return pOutputStream; 8967 8968#elif defined(_XM_SSE_INTRINSICS_) 8969 XMASSERT(pOutputStream); 8970 XMASSERT(pInputStream); 8971 8972 UINT i; 8973 const BYTE *pInputVector = (BYTE*)pInputStream; 8974 BYTE *pOutputVector = (BYTE*)pOutputStream; 8975 8976 for (i = 0; i < VectorCount; i++) 8977 { 8978 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x); 8979 XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y); 8980 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z); 8981 vResult = _mm_mul_ps(vResult,M.r[2]); 8982 vResult = _mm_add_ps(vResult,M.r[3]); 8983 Y = _mm_mul_ps(Y,M.r[1]); 8984 vResult = _mm_add_ps(vResult,Y); 8985 X = _mm_mul_ps(X,M.r[0]); 8986 vResult = _mm_add_ps(vResult,X); 8987 8988 X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); 8989 vResult = _mm_div_ps(vResult,X); 8990 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult); 8991 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); 8992 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult); 8993 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); 8994 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult); 8995 pInputVector += InputStride; 8996 pOutputVector += OutputStride; 8997 } 8998 8999 return pOutputStream; 9000#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 9001#endif // _XM_VMX128_INTRINSICS_ 9002} 9003 9004//------------------------------------------------------------------------------ 9005 9006XMFINLINE XMVECTOR XMVector3TransformNormal 9007( 9008 FXMVECTOR V, 9009 CXMMATRIX M 9010) 9011{ 9012#if defined(_XM_NO_INTRINSICS_) 9013 9014 XMVECTOR X; 9015 XMVECTOR Y; 9016 XMVECTOR Z; 9017 XMVECTOR Result; 9018 9019 Z = XMVectorSplatZ(V); 9020 Y = XMVectorSplatY(V); 9021 X = XMVectorSplatX(V); 9022 9023 Result = XMVectorMultiply(Z, M.r[2]); 9024 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 9025 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 9026 9027 return Result; 9028 9029#elif defined(_XM_SSE_INTRINSICS_) 9030 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); 9031 vResult = _mm_mul_ps(vResult,M.r[0]); 9032 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 9033 vTemp = _mm_mul_ps(vTemp,M.r[1]); 9034 vResult = _mm_add_ps(vResult,vTemp); 9035 vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); 9036 vTemp = _mm_mul_ps(vTemp,M.r[2]); 9037 vResult = _mm_add_ps(vResult,vTemp); 9038 return vResult; 9039#else // _XM_VMX128_INTRINSICS_ 9040#endif // _XM_VMX128_INTRINSICS_ 9041} 9042 9043//------------------------------------------------------------------------------ 9044 9045XMINLINE XMFLOAT3* XMVector3TransformNormalStream 9046( 9047 XMFLOAT3* pOutputStream, 9048 UINT OutputStride, 9049 CONST XMFLOAT3* pInputStream, 9050 UINT InputStride, 9051 UINT VectorCount, 9052 CXMMATRIX M 9053) 9054{ 9055#if defined(_XM_NO_INTRINSICS_) 9056 9057 XMVECTOR V; 9058 XMVECTOR X; 9059 XMVECTOR Y; 9060 XMVECTOR Z; 9061 XMVECTOR Result; 9062 UINT i; 9063 BYTE* pInputVector = (BYTE*)pInputStream; 9064 BYTE* pOutputVector = (BYTE*)pOutputStream; 9065 9066 XMASSERT(pOutputStream); 9067 XMASSERT(pInputStream); 9068 9069 for (i = 0; i < VectorCount; i++) 9070 { 9071 V = XMLoadFloat3((XMFLOAT3*)pInputVector); 9072 Z = XMVectorSplatZ(V); 9073 Y = XMVectorSplatY(V); 9074 X = XMVectorSplatX(V); 9075// Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z); 9076// Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y); 9077// X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x); 9078 9079 Result = XMVectorMultiply(Z, M.r[2]); 9080 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 9081 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 9082 9083 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); 9084 9085 pInputVector += InputStride; 9086 pOutputVector += OutputStride; 9087 } 9088 9089 return pOutputStream; 9090 9091#elif defined(_XM_SSE_INTRINSICS_) 9092 XMASSERT(pOutputStream); 9093 XMASSERT(pInputStream); 9094 9095 UINT i; 9096 const BYTE *pInputVector = (BYTE*)pInputStream; 9097 BYTE *pOutputVector = (BYTE*)pOutputStream; 9098 9099 for (i = 0; i < VectorCount; i++) 9100 { 9101 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x); 9102 XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y); 9103 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z); 9104 vResult = _mm_mul_ps(vResult,M.r[2]); 9105 Y = _mm_mul_ps(Y,M.r[1]); 9106 vResult = _mm_add_ps(vResult,Y); 9107 X = _mm_mul_ps(X,M.r[0]); 9108 vResult = _mm_add_ps(vResult,X); 9109 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult); 9110 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); 9111 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult); 9112 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); 9113 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult); 9114 pInputVector += InputStride; 9115 pOutputVector += OutputStride; 9116 } 9117 9118 return pOutputStream; 9119#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 9120#endif // _XM_VMX128_INTRINSICS_ 9121} 9122 9123//------------------------------------------------------------------------------ 9124 9125XMINLINE XMVECTOR XMVector3Project 9126( 9127 FXMVECTOR V, 9128 FLOAT ViewportX, 9129 FLOAT ViewportY, 9130 FLOAT ViewportWidth, 9131 FLOAT ViewportHeight, 9132 FLOAT ViewportMinZ, 9133 FLOAT ViewportMaxZ, 9134 CXMMATRIX Projection, 9135 CXMMATRIX View, 9136 CXMMATRIX World 9137) 9138{ 9139#if defined(_XM_NO_INTRINSICS_) 9140 9141 XMMATRIX Transform; 9142 XMVECTOR Scale; 9143 XMVECTOR Offset; 9144 XMVECTOR Result; 9145 FLOAT HalfViewportWidth = ViewportWidth * 0.5f; 9146 FLOAT HalfViewportHeight = ViewportHeight * 0.5f; 9147 9148 Scale = XMVectorSet(HalfViewportWidth, 9149 -HalfViewportHeight, 9150 ViewportMaxZ - ViewportMinZ, 9151 0.0f); 9152 9153 Offset = XMVectorSet(ViewportX + HalfViewportWidth, 9154 ViewportY + HalfViewportHeight, 9155 ViewportMinZ, 9156 0.0f); 9157 9158 Transform = XMMatrixMultiply(World, View); 9159 Transform = XMMatrixMultiply(Transform, Projection); 9160 9161 Result = XMVector3TransformCoord(V, Transform); 9162 9163 Result = XMVectorMultiplyAdd(Result, Scale, Offset); 9164 9165 return Result; 9166 9167#elif defined(_XM_SSE_INTRINSICS_) 9168 XMMATRIX Transform; 9169 XMVECTOR Scale; 9170 XMVECTOR Offset; 9171 XMVECTOR Result; 9172 FLOAT HalfViewportWidth = ViewportWidth * 0.5f; 9173 FLOAT HalfViewportHeight = ViewportHeight * 0.5f; 9174 9175 Scale = XMVectorSet(HalfViewportWidth, 9176 -HalfViewportHeight, 9177 ViewportMaxZ - ViewportMinZ, 9178 0.0f); 9179 9180 Offset = XMVectorSet(ViewportX + HalfViewportWidth, 9181 ViewportY + HalfViewportHeight, 9182 ViewportMinZ, 9183 0.0f); 9184 Transform = XMMatrixMultiply(World, View); 9185 Transform = XMMatrixMultiply(Transform, Projection); 9186 Result = XMVector3TransformCoord(V, Transform); 9187 Result = _mm_mul_ps(Result,Scale); 9188 Result = _mm_add_ps(Result,Offset); 9189 return Result; 9190#else // _XM_VMX128_INTRINSICS_ 9191#endif // _XM_VMX128_INTRINSICS_ 9192} 9193 9194//------------------------------------------------------------------------------ 9195 9196XMINLINE XMFLOAT3* XMVector3ProjectStream 9197( 9198 XMFLOAT3* pOutputStream, 9199 UINT OutputStride, 9200 CONST XMFLOAT3* pInputStream, 9201 UINT InputStride, 9202 UINT VectorCount, 9203 FLOAT ViewportX, 9204 FLOAT ViewportY, 9205 FLOAT ViewportWidth, 9206 FLOAT ViewportHeight, 9207 FLOAT ViewportMinZ, 9208 FLOAT ViewportMaxZ, 9209 CXMMATRIX Projection, 9210 CXMMATRIX View, 9211 CXMMATRIX World 9212) 9213{ 9214#if defined(_XM_NO_INTRINSICS_) 9215 9216 XMMATRIX Transform; 9217 XMVECTOR V; 9218 XMVECTOR Scale; 9219 XMVECTOR Offset; 9220 XMVECTOR Result; 9221 UINT i; 9222 FLOAT HalfViewportWidth = ViewportWidth * 0.5f; 9223 FLOAT HalfViewportHeight = ViewportHeight * 0.5f; 9224 BYTE* pInputVector = (BYTE*)pInputStream; 9225 BYTE* pOutputVector = (BYTE*)pOutputStream; 9226 9227 XMASSERT(pOutputStream); 9228 XMASSERT(pInputStream); 9229 9230 Scale = XMVectorSet(HalfViewportWidth, 9231 -HalfViewportHeight, 9232 ViewportMaxZ - ViewportMinZ, 9233 1.0f); 9234 9235 Offset = XMVectorSet(ViewportX + HalfViewportWidth, 9236 ViewportY + HalfViewportHeight, 9237 ViewportMinZ, 9238 0.0f); 9239 9240 Transform = XMMatrixMultiply(World, View); 9241 Transform = XMMatrixMultiply(Transform, Projection); 9242 9243 for (i = 0; i < VectorCount; i++) 9244 { 9245 V = XMLoadFloat3((XMFLOAT3*)pInputVector); 9246 9247 Result = XMVector3TransformCoord(V, Transform); 9248 9249 Result = XMVectorMultiplyAdd(Result, Scale, Offset); 9250 9251 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); 9252 9253 pInputVector += InputStride; 9254 pOutputVector += OutputStride; 9255 } 9256 9257 return pOutputStream; 9258 9259#elif defined(_XM_SSE_INTRINSICS_) 9260 XMASSERT(pOutputStream); 9261 XMASSERT(pInputStream); 9262 XMMATRIX Transform; 9263 XMVECTOR V; 9264 XMVECTOR Scale; 9265 XMVECTOR Offset; 9266 XMVECTOR Result; 9267 UINT i; 9268 FLOAT HalfViewportWidth = ViewportWidth * 0.5f; 9269 FLOAT HalfViewportHeight = ViewportHeight * 0.5f; 9270 BYTE* pInputVector = (BYTE*)pInputStream; 9271 BYTE* pOutputVector = (BYTE*)pOutputStream; 9272 9273 Scale = XMVectorSet(HalfViewportWidth, 9274 -HalfViewportHeight, 9275 ViewportMaxZ - ViewportMinZ, 9276 1.0f); 9277 9278 Offset = XMVectorSet(ViewportX + HalfViewportWidth, 9279 ViewportY + HalfViewportHeight, 9280 ViewportMinZ, 9281 0.0f); 9282 9283 Transform = XMMatrixMultiply(World, View); 9284 Transform = XMMatrixMultiply(Transform, Projection); 9285 9286 for (i = 0; i < VectorCount; i++) 9287 { 9288 V = XMLoadFloat3((XMFLOAT3*)pInputVector); 9289 9290 Result = XMVector3TransformCoord(V, Transform); 9291 9292 Result = _mm_mul_ps(Result,Scale); 9293 Result = _mm_add_ps(Result,Offset); 9294 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); 9295 pInputVector += InputStride; 9296 pOutputVector += OutputStride; 9297 } 9298 return pOutputStream; 9299 9300#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 9301#endif // _XM_VMX128_INTRINSICS_ 9302} 9303 9304//------------------------------------------------------------------------------ 9305 9306XMFINLINE XMVECTOR XMVector3Unproject 9307( 9308 FXMVECTOR V, 9309 FLOAT ViewportX, 9310 FLOAT ViewportY, 9311 FLOAT ViewportWidth, 9312 FLOAT ViewportHeight, 9313 FLOAT ViewportMinZ, 9314 FLOAT ViewportMaxZ, 9315 CXMMATRIX Projection, 9316 CXMMATRIX View, 9317 CXMMATRIX World 9318) 9319{ 9320#if defined(_XM_NO_INTRINSICS_) 9321 9322 XMMATRIX Transform; 9323 XMVECTOR Scale; 9324 XMVECTOR Offset; 9325 XMVECTOR Determinant; 9326 XMVECTOR Result; 9327 CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f); 9328 9329 Scale = XMVectorSet(ViewportWidth * 0.5f, 9330 -ViewportHeight * 0.5f, 9331 ViewportMaxZ - ViewportMinZ, 9332 1.0f); 9333 Scale = XMVectorReciprocal(Scale); 9334 9335 Offset = XMVectorSet(-ViewportX, 9336 -ViewportY, 9337 -ViewportMinZ, 9338 0.0f); 9339 Offset = XMVectorMultiplyAdd(Scale, Offset, D); 9340 9341 Transform = XMMatrixMultiply(World, View); 9342 Transform = XMMatrixMultiply(Transform, Projection); 9343 Transform = XMMatrixInverse(&Determinant, Transform); 9344 9345 Result = XMVectorMultiplyAdd(V, Scale, Offset); 9346 9347 Result = XMVector3TransformCoord(Result, Transform); 9348 9349 return Result; 9350 9351#elif defined(_XM_SSE_INTRINSICS_) 9352 XMMATRIX Transform; 9353 XMVECTOR Scale; 9354 XMVECTOR Offset; 9355 XMVECTOR Determinant; 9356 XMVECTOR Result; 9357 CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f}; 9358 9359 Scale = XMVectorSet(ViewportWidth * 0.5f, 9360 -ViewportHeight * 0.5f, 9361 ViewportMaxZ - ViewportMinZ, 9362 1.0f); 9363 Scale = XMVectorReciprocal(Scale); 9364 9365 Offset = XMVectorSet(-ViewportX, 9366 -ViewportY, 9367 -ViewportMinZ, 9368 0.0f); 9369 Offset = _mm_mul_ps(Offset,Scale); 9370 Offset = _mm_add_ps(Offset,D); 9371 9372 Transform = XMMatrixMultiply(World, View); 9373 Transform = XMMatrixMultiply(Transform, Projection); 9374 Transform = XMMatrixInverse(&Determinant, Transform); 9375 9376 Result = _mm_mul_ps(V,Scale); 9377 Result = _mm_add_ps(Result,Offset); 9378 9379 Result = XMVector3TransformCoord(Result, Transform); 9380 9381 return Result; 9382#else // _XM_VMX128_INTRINSICS_ 9383#endif // _XM_VMX128_INTRINSICS_ 9384} 9385 9386//------------------------------------------------------------------------------ 9387 9388XMINLINE XMFLOAT3* XMVector3UnprojectStream 9389( 9390 XMFLOAT3* pOutputStream, 9391 UINT OutputStride, 9392 CONST XMFLOAT3* pInputStream, 9393 UINT InputStride, 9394 UINT VectorCount, 9395 FLOAT ViewportX, 9396 FLOAT ViewportY, 9397 FLOAT ViewportWidth, 9398 FLOAT ViewportHeight, 9399 FLOAT ViewportMinZ, 9400 FLOAT ViewportMaxZ, 9401 CXMMATRIX Projection, 9402 CXMMATRIX View, 9403 CXMMATRIX World) 9404{ 9405#if defined(_XM_NO_INTRINSICS_) 9406 9407 XMMATRIX Transform; 9408 XMVECTOR Scale; 9409 XMVECTOR Offset; 9410 XMVECTOR V; 9411 XMVECTOR Determinant; 9412 XMVECTOR Result; 9413 UINT i; 9414 BYTE* pInputVector = (BYTE*)pInputStream; 9415 BYTE* pOutputVector = (BYTE*)pOutputStream; 9416 CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f); 9417 9418 XMASSERT(pOutputStream); 9419 XMASSERT(pInputStream); 9420 9421 Scale = XMVectorSet(ViewportWidth * 0.5f, 9422 -ViewportHeight * 0.5f, 9423 ViewportMaxZ - ViewportMinZ, 9424 1.0f); 9425 Scale = XMVectorReciprocal(Scale); 9426 9427 Offset = XMVectorSet(-ViewportX, 9428 -ViewportY, 9429 -ViewportMinZ, 9430 0.0f); 9431 Offset = XMVectorMultiplyAdd(Scale, Offset, D); 9432 9433 Transform = XMMatrixMultiply(World, View); 9434 Transform = XMMatrixMultiply(Transform, Projection); 9435 Transform = XMMatrixInverse(&Determinant, Transform); 9436 9437 for (i = 0; i < VectorCount; i++) 9438 { 9439 V = XMLoadFloat3((XMFLOAT3*)pInputVector); 9440 9441 Result = XMVectorMultiplyAdd(V, Scale, Offset); 9442 9443 Result = XMVector3TransformCoord(Result, Transform); 9444 9445 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); 9446 9447 pInputVector += InputStride; 9448 pOutputVector += OutputStride; 9449 } 9450 9451 return pOutputStream; 9452 9453#elif defined(_XM_SSE_INTRINSICS_) 9454 XMASSERT(pOutputStream); 9455 XMASSERT(pInputStream); 9456 XMMATRIX Transform; 9457 XMVECTOR Scale; 9458 XMVECTOR Offset; 9459 XMVECTOR V; 9460 XMVECTOR Determinant; 9461 XMVECTOR Result; 9462 UINT i; 9463 BYTE* pInputVector = (BYTE*)pInputStream; 9464 BYTE* pOutputVector = (BYTE*)pOutputStream; 9465 CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f}; 9466 9467 Scale = XMVectorSet(ViewportWidth * 0.5f, 9468 -ViewportHeight * 0.5f, 9469 ViewportMaxZ - ViewportMinZ, 9470 1.0f); 9471 Scale = XMVectorReciprocal(Scale); 9472 9473 Offset = XMVectorSet(-ViewportX, 9474 -ViewportY, 9475 -ViewportMinZ, 9476 0.0f); 9477 Offset = _mm_mul_ps(Offset,Scale); 9478 Offset = _mm_add_ps(Offset,D); 9479 9480 Transform = XMMatrixMultiply(World, View); 9481 Transform = XMMatrixMultiply(Transform, Projection); 9482 Transform = XMMatrixInverse(&Determinant, Transform); 9483 9484 for (i = 0; i < VectorCount; i++) 9485 { 9486 V = XMLoadFloat3((XMFLOAT3*)pInputVector); 9487 9488 Result = XMVectorMultiplyAdd(V, Scale, Offset); 9489 9490 Result = XMVector3TransformCoord(Result, Transform); 9491 9492 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); 9493 9494 pInputVector += InputStride; 9495 pOutputVector += OutputStride; 9496 } 9497 9498 return pOutputStream; 9499#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 9500#endif // _XM_VMX128_INTRINSICS_ 9501} 9502 9503/**************************************************************************** 9504 * 9505 * 4D Vector 9506 * 9507 ****************************************************************************/ 9508 9509//------------------------------------------------------------------------------ 9510// Comparison operations 9511//------------------------------------------------------------------------------ 9512 9513//------------------------------------------------------------------------------ 9514 9515XMFINLINE BOOL XMVector4Equal 9516( 9517 FXMVECTOR V1, 9518 FXMVECTOR V2 9519) 9520{ 9521#if defined(_XM_NO_INTRINSICS_) 9522 return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0); 9523#elif defined(_XM_SSE_INTRINSICS_) 9524 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 9525 return ((_mm_movemask_ps(vTemp)==0x0f) != 0); 9526#else 9527 return XMComparisonAllTrue(XMVector4EqualR(V1, V2)); 9528#endif 9529} 9530 9531//------------------------------------------------------------------------------ 9532 9533XMFINLINE UINT XMVector4EqualR 9534( 9535 FXMVECTOR V1, 9536 FXMVECTOR V2 9537) 9538{ 9539#if defined(_XM_NO_INTRINSICS_) 9540 9541 UINT CR = 0; 9542 9543 if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && 9544 (V1.vector4_f32[1] == V2.vector4_f32[1]) && 9545 (V1.vector4_f32[2] == V2.vector4_f32[2]) && 9546 (V1.vector4_f32[3] == V2.vector4_f32[3])) 9547 { 9548 CR = XM_CRMASK_CR6TRUE; 9549 } 9550 else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && 9551 (V1.vector4_f32[1] != V2.vector4_f32[1]) && 9552 (V1.vector4_f32[2] != V2.vector4_f32[2]) && 9553 (V1.vector4_f32[3] != V2.vector4_f32[3])) 9554 { 9555 CR = XM_CRMASK_CR6FALSE; 9556 } 9557 return CR; 9558 9559#elif defined(_XM_SSE_INTRINSICS_) 9560 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); 9561 int iTest = _mm_movemask_ps(vTemp); 9562 UINT CR = 0; 9563 if (iTest==0xf) // All equal? 9564 { 9565 CR = XM_CRMASK_CR6TRUE; 9566 } 9567 else if (iTest==0) // All not equal? 9568 { 9569 CR = XM_CRMASK_CR6FALSE; 9570 } 9571 return CR; 9572#else // _XM_VMX128_INTRINSICS_ 9573#endif // _XM_VMX128_INTRINSICS_ 9574} 9575 9576//------------------------------------------------------------------------------ 9577 9578XMFINLINE BOOL XMVector4EqualInt 9579( 9580 FXMVECTOR V1, 9581 FXMVECTOR V2 9582) 9583{ 9584#if defined(_XM_NO_INTRINSICS_) 9585 return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0); 9586#elif defined(_XM_SSE_INTRINSICS_) 9587 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 9588 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0); 9589#else 9590 return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2)); 9591#endif 9592} 9593 9594//------------------------------------------------------------------------------ 9595 9596XMFINLINE UINT XMVector4EqualIntR 9597( 9598 FXMVECTOR V1, 9599 FXMVECTOR V2 9600) 9601{ 9602#if defined(_XM_NO_INTRINSICS_) 9603 UINT CR = 0; 9604 if (V1.vector4_u32[0] == V2.vector4_u32[0] && 9605 V1.vector4_u32[1] == V2.vector4_u32[1] && 9606 V1.vector4_u32[2] == V2.vector4_u32[2] && 9607 V1.vector4_u32[3] == V2.vector4_u32[3]) 9608 { 9609 CR = XM_CRMASK_CR6TRUE; 9610 } 9611 else if (V1.vector4_u32[0] != V2.vector4_u32[0] && 9612 V1.vector4_u32[1] != V2.vector4_u32[1] && 9613 V1.vector4_u32[2] != V2.vector4_u32[2] && 9614 V1.vector4_u32[3] != V2.vector4_u32[3]) 9615 { 9616 CR = XM_CRMASK_CR6FALSE; 9617 } 9618 return CR; 9619 9620#elif defined(_XM_SSE_INTRINSICS_) 9621 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 9622 int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]); 9623 UINT CR = 0; 9624 if (iTest==0xf) // All equal? 9625 { 9626 CR = XM_CRMASK_CR6TRUE; 9627 } 9628 else if (iTest==0) // All not equal? 9629 { 9630 CR = XM_CRMASK_CR6FALSE; 9631 } 9632 return CR; 9633#else // _XM_VMX128_INTRINSICS_ 9634#endif // _XM_VMX128_INTRINSICS_ 9635} 9636 9637XMFINLINE BOOL XMVector4NearEqual 9638( 9639 FXMVECTOR V1, 9640 FXMVECTOR V2, 9641 FXMVECTOR Epsilon 9642) 9643{ 9644#if defined(_XM_NO_INTRINSICS_) 9645 FLOAT dx, dy, dz, dw; 9646 9647 dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]); 9648 dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]); 9649 dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]); 9650 dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]); 9651 return (((dx <= Epsilon.vector4_f32[0]) && 9652 (dy <= Epsilon.vector4_f32[1]) && 9653 (dz <= Epsilon.vector4_f32[2]) && 9654 (dw <= Epsilon.vector4_f32[3])) != 0); 9655#elif defined(_XM_SSE_INTRINSICS_) 9656 // Get the difference 9657 XMVECTOR vDelta = _mm_sub_ps(V1,V2); 9658 // Get the absolute value of the difference 9659 XMVECTOR vTemp = _mm_setzero_ps(); 9660 vTemp = _mm_sub_ps(vTemp,vDelta); 9661 vTemp = _mm_max_ps(vTemp,vDelta); 9662 vTemp = _mm_cmple_ps(vTemp,Epsilon); 9663 return ((_mm_movemask_ps(vTemp)==0xf) != 0); 9664#else // _XM_VMX128_INTRINSICS_ 9665#endif // _XM_VMX128_INTRINSICS_ 9666} 9667 9668//------------------------------------------------------------------------------ 9669 9670XMFINLINE BOOL XMVector4NotEqual 9671( 9672 FXMVECTOR V1, 9673 FXMVECTOR V2 9674) 9675{ 9676#if defined(_XM_NO_INTRINSICS_) 9677 return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0); 9678#elif defined(_XM_SSE_INTRINSICS_) 9679 XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2); 9680 return ((_mm_movemask_ps(vTemp)) != 0); 9681#else 9682 return XMComparisonAnyFalse(XMVector4EqualR(V1, V2)); 9683#endif 9684} 9685 9686//------------------------------------------------------------------------------ 9687 9688XMFINLINE BOOL XMVector4NotEqualInt 9689( 9690 FXMVECTOR V1, 9691 FXMVECTOR V2 9692) 9693{ 9694#if defined(_XM_NO_INTRINSICS_) 9695 return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0); 9696#elif defined(_XM_SSE_INTRINSICS_) 9697 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); 9698 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0); 9699#else 9700 return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2)); 9701#endif 9702} 9703 9704//------------------------------------------------------------------------------ 9705 9706XMFINLINE BOOL XMVector4Greater 9707( 9708 FXMVECTOR V1, 9709 FXMVECTOR V2 9710) 9711{ 9712#if defined(_XM_NO_INTRINSICS_) 9713 return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0); 9714#elif defined(_XM_SSE_INTRINSICS_) 9715 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); 9716 return ((_mm_movemask_ps(vTemp)==0x0f) != 0); 9717#else 9718 return XMComparisonAllTrue(XMVector4GreaterR(V1, V2)); 9719#endif 9720} 9721 9722//------------------------------------------------------------------------------ 9723 9724XMFINLINE UINT XMVector4GreaterR 9725( 9726 FXMVECTOR V1, 9727 FXMVECTOR V2 9728) 9729{ 9730#if defined(_XM_NO_INTRINSICS_) 9731 UINT CR = 0; 9732 if (V1.vector4_f32[0] > V2.vector4_f32[0] && 9733 V1.vector4_f32[1] > V2.vector4_f32[1] && 9734 V1.vector4_f32[2] > V2.vector4_f32[2] && 9735 V1.vector4_f32[3] > V2.vector4_f32[3]) 9736 { 9737 CR = XM_CRMASK_CR6TRUE; 9738 } 9739 else if (V1.vector4_f32[0] <= V2.vector4_f32[0] && 9740 V1.vector4_f32[1] <= V2.vector4_f32[1] && 9741 V1.vector4_f32[2] <= V2.vector4_f32[2] && 9742 V1.vector4_f32[3] <= V2.vector4_f32[3]) 9743 { 9744 CR = XM_CRMASK_CR6FALSE; 9745 } 9746 return CR; 9747 9748#elif defined(_XM_SSE_INTRINSICS_) 9749 UINT CR = 0; 9750 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); 9751 int iTest = _mm_movemask_ps(vTemp); 9752 if (iTest==0xf) { 9753 CR = XM_CRMASK_CR6TRUE; 9754 } 9755 else if (!iTest) 9756 { 9757 CR = XM_CRMASK_CR6FALSE; 9758 } 9759 return CR; 9760#else // _XM_VMX128_INTRINSICS_ 9761#endif // _XM_VMX128_INTRINSICS_ 9762} 9763 9764//------------------------------------------------------------------------------ 9765 9766XMFINLINE BOOL XMVector4GreaterOrEqual 9767( 9768 FXMVECTOR V1, 9769 FXMVECTOR V2 9770) 9771{ 9772#if defined(_XM_NO_INTRINSICS_) 9773 return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0); 9774#elif defined(_XM_SSE_INTRINSICS_) 9775 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); 9776 return ((_mm_movemask_ps(vTemp)==0x0f) != 0); 9777#else 9778 return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2)); 9779#endif 9780} 9781 9782//------------------------------------------------------------------------------ 9783 9784XMFINLINE UINT XMVector4GreaterOrEqualR 9785( 9786 FXMVECTOR V1, 9787 FXMVECTOR V2 9788) 9789{ 9790#if defined(_XM_NO_INTRINSICS_) 9791 UINT CR = 0; 9792 if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && 9793 (V1.vector4_f32[1] >= V2.vector4_f32[1]) && 9794 (V1.vector4_f32[2] >= V2.vector4_f32[2]) && 9795 (V1.vector4_f32[3] >= V2.vector4_f32[3])) 9796 { 9797 CR = XM_CRMASK_CR6TRUE; 9798 } 9799 else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && 9800 (V1.vector4_f32[1] < V2.vector4_f32[1]) && 9801 (V1.vector4_f32[2] < V2.vector4_f32[2]) && 9802 (V1.vector4_f32[3] < V2.vector4_f32[3])) 9803 { 9804 CR = XM_CRMASK_CR6FALSE; 9805 } 9806 return CR; 9807 9808#elif defined(_XM_SSE_INTRINSICS_) 9809 UINT CR = 0; 9810 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); 9811 int iTest = _mm_movemask_ps(vTemp); 9812 if (iTest==0x0f) 9813 { 9814 CR = XM_CRMASK_CR6TRUE; 9815 } 9816 else if (!iTest) 9817 { 9818 CR = XM_CRMASK_CR6FALSE; 9819 } 9820 return CR; 9821#else // _XM_VMX128_INTRINSICS_ 9822#endif // _XM_VMX128_INTRINSICS_ 9823} 9824 9825//------------------------------------------------------------------------------ 9826 9827XMFINLINE BOOL XMVector4Less 9828( 9829 FXMVECTOR V1, 9830 FXMVECTOR V2 9831) 9832{ 9833#if defined(_XM_NO_INTRINSICS_) 9834 return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0); 9835#elif defined(_XM_SSE_INTRINSICS_) 9836 XMVECTOR vTemp = _mm_cmplt_ps(V1,V2); 9837 return ((_mm_movemask_ps(vTemp)==0x0f) != 0); 9838#else 9839 return XMComparisonAllTrue(XMVector4GreaterR(V2, V1)); 9840#endif 9841} 9842 9843//------------------------------------------------------------------------------ 9844 9845XMFINLINE BOOL XMVector4LessOrEqual 9846( 9847 FXMVECTOR V1, 9848 FXMVECTOR V2 9849) 9850{ 9851#if defined(_XM_NO_INTRINSICS_) 9852 return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0); 9853#elif defined(_XM_SSE_INTRINSICS_) 9854 XMVECTOR vTemp = _mm_cmple_ps(V1,V2); 9855 return ((_mm_movemask_ps(vTemp)==0x0f) != 0); 9856#else 9857 return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1)); 9858#endif 9859} 9860 9861//------------------------------------------------------------------------------ 9862 9863XMFINLINE BOOL XMVector4InBounds 9864( 9865 FXMVECTOR V, 9866 FXMVECTOR Bounds 9867) 9868{ 9869#if defined(_XM_NO_INTRINSICS_) 9870 return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 9871 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && 9872 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) && 9873 (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0); 9874#elif defined(_XM_SSE_INTRINSICS_) 9875 // Test if less than or equal 9876 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 9877 // Negate the bounds 9878 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 9879 // Test if greater or equal (Reversed) 9880 vTemp2 = _mm_cmple_ps(vTemp2,V); 9881 // Blend answers 9882 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 9883 // All in bounds? 9884 return ((_mm_movemask_ps(vTemp1)==0x0f) != 0); 9885#else 9886 return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds)); 9887#endif 9888} 9889 9890//------------------------------------------------------------------------------ 9891 9892XMFINLINE UINT XMVector4InBoundsR 9893( 9894 FXMVECTOR V, 9895 FXMVECTOR Bounds 9896) 9897{ 9898#if defined(_XM_NO_INTRINSICS_) 9899 9900 UINT CR = 0; 9901 if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 9902 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && 9903 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) && 9904 (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) 9905 { 9906 CR = XM_CRMASK_CR6BOUNDS; 9907 } 9908 return CR; 9909 9910#elif defined(_XM_SSE_INTRINSICS_) 9911 // Test if less than or equal 9912 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); 9913 // Negate the bounds 9914 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); 9915 // Test if greater or equal (Reversed) 9916 vTemp2 = _mm_cmple_ps(vTemp2,V); 9917 // Blend answers 9918 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 9919 // All in bounds? 9920 return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0; 9921#else // _XM_VMX128_INTRINSICS_ 9922#endif // _XM_VMX128_INTRINSICS_ 9923} 9924 9925//------------------------------------------------------------------------------ 9926 9927XMFINLINE BOOL XMVector4IsNaN 9928( 9929 FXMVECTOR V 9930) 9931{ 9932#if defined(_XM_NO_INTRINSICS_) 9933 return (XMISNAN(V.vector4_f32[0]) || 9934 XMISNAN(V.vector4_f32[1]) || 9935 XMISNAN(V.vector4_f32[2]) || 9936 XMISNAN(V.vector4_f32[3])); 9937#elif defined(_XM_SSE_INTRINSICS_) 9938 // Test against itself. NaN is always not equal 9939 XMVECTOR vTempNan = _mm_cmpneq_ps(V,V); 9940 // If any are NaN, the mask is non-zero 9941 return (_mm_movemask_ps(vTempNan)!=0); 9942#else // _XM_VMX128_INTRINSICS_ 9943#endif // _XM_VMX128_INTRINSICS_ 9944} 9945 9946//------------------------------------------------------------------------------ 9947 9948XMFINLINE BOOL XMVector4IsInfinite 9949( 9950 FXMVECTOR V 9951) 9952{ 9953#if defined(_XM_NO_INTRINSICS_) 9954 9955 return (XMISINF(V.vector4_f32[0]) || 9956 XMISINF(V.vector4_f32[1]) || 9957 XMISINF(V.vector4_f32[2]) || 9958 XMISINF(V.vector4_f32[3])); 9959 9960#elif defined(_XM_SSE_INTRINSICS_) 9961 // Mask off the sign bit 9962 XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask); 9963 // Compare to infinity 9964 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); 9965 // If any are infinity, the signs are true. 9966 return (_mm_movemask_ps(vTemp) != 0); 9967#else // _XM_VMX128_INTRINSICS_ 9968#endif // _XM_VMX128_INTRINSICS_ 9969} 9970 9971//------------------------------------------------------------------------------ 9972// Computation operations 9973//------------------------------------------------------------------------------ 9974 9975//------------------------------------------------------------------------------ 9976 9977XMFINLINE XMVECTOR XMVector4Dot 9978( 9979 FXMVECTOR V1, 9980 FXMVECTOR V2 9981) 9982{ 9983#if defined(_XM_NO_INTRINSICS_) 9984 9985 XMVECTOR Result; 9986 9987 Result.vector4_f32[0] = 9988 Result.vector4_f32[1] = 9989 Result.vector4_f32[2] = 9990 Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3]; 9991 9992 return Result; 9993 9994#elif defined(_XM_SSE_INTRINSICS_) 9995 XMVECTOR vTemp2 = V2; 9996 XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2); 9997 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position 9998 vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W; 9999 vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position 10000 vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together 10001 return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return 10002#else // _XM_VMX128_INTRINSICS_ 10003#endif // _XM_VMX128_INTRINSICS_ 10004} 10005 10006//------------------------------------------------------------------------------ 10007 10008XMFINLINE XMVECTOR XMVector4Cross 10009( 10010 FXMVECTOR V1, 10011 FXMVECTOR V2, 10012 FXMVECTOR V3 10013) 10014{ 10015#if defined(_XM_NO_INTRINSICS_) 10016 XMVECTOR Result; 10017 10018 Result.vector4_f32[0] = (((V2.vector4_f32[2]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[2]))*V1.vector4_f32[1])-(((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[2])+(((V2.vector4_f32[1]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[1]))*V1.vector4_f32[3]); 10019 Result.vector4_f32[1] = (((V2.vector4_f32[3]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[3]))*V1.vector4_f32[0])-(((V2.vector4_f32[3]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[3]))*V1.vector4_f32[2])+(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[3]); 10020 Result.vector4_f32[2] = (((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[0])-(((V2.vector4_f32[0]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[0]))*V1.vector4_f32[1])+(((V2.vector4_f32[0]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[0]))*V1.vector4_f32[3]); 10021 Result.vector4_f32[3] = (((V2.vector4_f32[2]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[2]))*V1.vector4_f32[0])-(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[1])+(((V2.vector4_f32[1]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[1]))*V1.vector4_f32[2]); 10022 return Result; 10023 10024#elif defined(_XM_SSE_INTRINSICS_) 10025 // V2zwyz * V3wzwy 10026 XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2)); 10027 XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3)); 10028 vResult = _mm_mul_ps(vResult,vTemp3); 10029 // - V2wzwy * V3zwyz 10030 XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3)); 10031 vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1)); 10032 vTemp2 = _mm_mul_ps(vTemp2,vTemp3); 10033 vResult = _mm_sub_ps(vResult,vTemp2); 10034 // term1 * V1yxxx 10035 XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1)); 10036 vResult = _mm_mul_ps(vResult,vTemp1); 10037 10038 // V2ywxz * V3wxwx 10039 vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1)); 10040 vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3)); 10041 vTemp3 = _mm_mul_ps(vTemp3,vTemp2); 10042 // - V2wxwx * V3ywxz 10043 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1)); 10044 vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1)); 10045 vTemp2 = _mm_mul_ps(vTemp2,vTemp1); 10046 vTemp3 = _mm_sub_ps(vTemp3,vTemp2); 10047 // vResult - temp * V1zzyy 10048 vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2)); 10049 vTemp1 = _mm_mul_ps(vTemp1,vTemp3); 10050 vResult = _mm_sub_ps(vResult,vTemp1); 10051 10052 // V2yzxy * V3zxyx 10053 vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1)); 10054 vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2)); 10055 vTemp3 = _mm_mul_ps(vTemp3,vTemp2); 10056 // - V2zxyx * V3yzxy 10057 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1)); 10058 vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1)); 10059 vTemp1 = _mm_mul_ps(vTemp1,vTemp2); 10060 vTemp3 = _mm_sub_ps(vTemp3,vTemp1); 10061 // vResult + term * V1wwwz 10062 vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3)); 10063 vTemp3 = _mm_mul_ps(vTemp3,vTemp1); 10064 vResult = _mm_add_ps(vResult,vTemp3); 10065 return vResult; 10066#else // _XM_VMX128_INTRINSICS_ 10067#endif // _XM_VMX128_INTRINSICS_ 10068} 10069 10070//------------------------------------------------------------------------------ 10071 10072XMFINLINE XMVECTOR XMVector4LengthSq 10073( 10074 FXMVECTOR V 10075) 10076{ 10077 return XMVector4Dot(V, V); 10078} 10079 10080//------------------------------------------------------------------------------ 10081 10082XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst 10083( 10084 FXMVECTOR V 10085) 10086{ 10087#if defined(_XM_NO_INTRINSICS_) 10088 10089 XMVECTOR Result; 10090 10091 Result = XMVector4LengthSq(V); 10092 Result = XMVectorReciprocalSqrtEst(Result); 10093 10094 return Result; 10095 10096#elif defined(_XM_SSE_INTRINSICS_) 10097 // Perform the dot product on x,y,z and w 10098 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 10099 // vTemp has z and w 10100 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); 10101 // x+z, y+w 10102 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10103 // x+z,x+z,x+z,y+w 10104 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); 10105 // ??,??,y+w,y+w 10106 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); 10107 // ??,??,x+z+y+w,?? 10108 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10109 // Splat the length 10110 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); 10111 // Get the reciprocal 10112 vLengthSq = _mm_rsqrt_ps(vLengthSq); 10113 return vLengthSq; 10114#else // _XM_VMX128_INTRINSICS_ 10115#endif // _XM_VMX128_INTRINSICS_ 10116} 10117 10118//------------------------------------------------------------------------------ 10119 10120XMFINLINE XMVECTOR XMVector4ReciprocalLength 10121( 10122 FXMVECTOR V 10123) 10124{ 10125#if defined(_XM_NO_INTRINSICS_) 10126 10127 XMVECTOR Result; 10128 10129 Result = XMVector4LengthSq(V); 10130 Result = XMVectorReciprocalSqrt(Result); 10131 10132 return Result; 10133 10134#elif defined(_XM_SSE_INTRINSICS_) 10135 // Perform the dot product on x,y,z and w 10136 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 10137 // vTemp has z and w 10138 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); 10139 // x+z, y+w 10140 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10141 // x+z,x+z,x+z,y+w 10142 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); 10143 // ??,??,y+w,y+w 10144 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); 10145 // ??,??,x+z+y+w,?? 10146 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10147 // Splat the length 10148 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); 10149 // Get the reciprocal 10150 vLengthSq = _mm_sqrt_ps(vLengthSq); 10151 // Accurate! 10152 vLengthSq = _mm_div_ps(g_XMOne,vLengthSq); 10153 return vLengthSq; 10154#else // _XM_VMX128_INTRINSICS_ 10155#endif // _XM_VMX128_INTRINSICS_ 10156} 10157 10158//------------------------------------------------------------------------------ 10159 10160XMFINLINE XMVECTOR XMVector4LengthEst 10161( 10162 FXMVECTOR V 10163) 10164{ 10165#if defined(_XM_NO_INTRINSICS_) 10166 10167 XMVECTOR Result; 10168 10169 Result = XMVector4LengthSq(V); 10170 Result = XMVectorSqrtEst(Result); 10171 10172 return Result; 10173 10174#elif defined(_XM_SSE_INTRINSICS_) 10175 // Perform the dot product on x,y,z and w 10176 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 10177 // vTemp has z and w 10178 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); 10179 // x+z, y+w 10180 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10181 // x+z,x+z,x+z,y+w 10182 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); 10183 // ??,??,y+w,y+w 10184 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); 10185 // ??,??,x+z+y+w,?? 10186 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10187 // Splat the length 10188 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); 10189 // Prepare for the division 10190 vLengthSq = _mm_sqrt_ps(vLengthSq); 10191 return vLengthSq; 10192#else // _XM_VMX128_INTRINSICS_ 10193#endif // _XM_VMX128_INTRINSICS_ 10194} 10195 10196//------------------------------------------------------------------------------ 10197 10198XMFINLINE XMVECTOR XMVector4Length 10199( 10200 FXMVECTOR V 10201) 10202{ 10203#if defined(_XM_NO_INTRINSICS_) 10204 10205 XMVECTOR Result; 10206 10207 Result = XMVector4LengthSq(V); 10208 Result = XMVectorSqrt(Result); 10209 10210 return Result; 10211 10212#elif defined(_XM_SSE_INTRINSICS_) 10213 // Perform the dot product on x,y,z and w 10214 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 10215 // vTemp has z and w 10216 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); 10217 // x+z, y+w 10218 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10219 // x+z,x+z,x+z,y+w 10220 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); 10221 // ??,??,y+w,y+w 10222 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); 10223 // ??,??,x+z+y+w,?? 10224 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10225 // Splat the length 10226 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); 10227 // Prepare for the division 10228 vLengthSq = _mm_sqrt_ps(vLengthSq); 10229 return vLengthSq; 10230#else // _XM_VMX128_INTRINSICS_ 10231#endif // _XM_VMX128_INTRINSICS_ 10232} 10233 10234//------------------------------------------------------------------------------ 10235// XMVector4NormalizeEst uses a reciprocal estimate and 10236// returns QNaN on zero and infinite vectors. 10237 10238XMFINLINE XMVECTOR XMVector4NormalizeEst 10239( 10240 FXMVECTOR V 10241) 10242{ 10243#if defined(_XM_NO_INTRINSICS_) 10244 10245 XMVECTOR Result; 10246 Result = XMVector4ReciprocalLength(V); 10247 Result = XMVectorMultiply(V, Result); 10248 return Result; 10249 10250#elif defined(_XM_SSE_INTRINSICS_) 10251 // Perform the dot product on x,y,z and w 10252 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 10253 // vTemp has z and w 10254 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); 10255 // x+z, y+w 10256 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10257 // x+z,x+z,x+z,y+w 10258 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); 10259 // ??,??,y+w,y+w 10260 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); 10261 // ??,??,x+z+y+w,?? 10262 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10263 // Splat the length 10264 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); 10265 // Get the reciprocal 10266 XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq); 10267 // Reciprocal mul to perform the normalization 10268 vResult = _mm_mul_ps(vResult,V); 10269 return vResult; 10270#else // _XM_VMX128_INTRINSICS_ 10271#endif // _XM_VMX128_INTRINSICS_ 10272} 10273 10274//------------------------------------------------------------------------------ 10275 10276XMFINLINE XMVECTOR XMVector4Normalize 10277( 10278 FXMVECTOR V 10279) 10280{ 10281#if defined(_XM_NO_INTRINSICS_) 10282 FLOAT fLength; 10283 XMVECTOR vResult; 10284 10285 vResult = XMVector4Length( V ); 10286 fLength = vResult.vector4_f32[0]; 10287 10288 // Prevent divide by zero 10289 if (fLength > 0) { 10290 fLength = 1.0f/fLength; 10291 } 10292 10293 vResult.vector4_f32[0] = V.vector4_f32[0]*fLength; 10294 vResult.vector4_f32[1] = V.vector4_f32[1]*fLength; 10295 vResult.vector4_f32[2] = V.vector4_f32[2]*fLength; 10296 vResult.vector4_f32[3] = V.vector4_f32[3]*fLength; 10297 return vResult; 10298 10299#elif defined(_XM_SSE_INTRINSICS_) 10300 // Perform the dot product on x,y,z and w 10301 XMVECTOR vLengthSq = _mm_mul_ps(V,V); 10302 // vTemp has z and w 10303 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); 10304 // x+z, y+w 10305 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10306 // x+z,x+z,x+z,y+w 10307 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); 10308 // ??,??,y+w,y+w 10309 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); 10310 // ??,??,x+z+y+w,?? 10311 vLengthSq = _mm_add_ps(vLengthSq,vTemp); 10312 // Splat the length 10313 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); 10314 // Prepare for the division 10315 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); 10316 // Create zero with a single instruction 10317 XMVECTOR vZeroMask = _mm_setzero_ps(); 10318 // Test for a divide by zero (Must be FP to detect -0.0) 10319 vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult); 10320 // Failsafe on zero (Or epsilon) length planes 10321 // If the length is infinity, set the elements to zero 10322 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); 10323 // Divide to perform the normalization 10324 vResult = _mm_div_ps(V,vResult); 10325 // Any that are infinity, set to zero 10326 vResult = _mm_and_ps(vResult,vZeroMask); 10327 // Select qnan or result based on infinite length 10328 XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN); 10329 XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq); 10330 vResult = _mm_or_ps(vTemp1,vTemp2); 10331 return vResult; 10332#else // _XM_VMX128_INTRINSICS_ 10333#endif // _XM_VMX128_INTRINSICS_ 10334} 10335 10336//------------------------------------------------------------------------------ 10337 10338XMFINLINE XMVECTOR XMVector4ClampLength 10339( 10340 FXMVECTOR V, 10341 FLOAT LengthMin, 10342 FLOAT LengthMax 10343) 10344{ 10345#if defined(_XM_NO_INTRINSICS_) 10346 10347 XMVECTOR ClampMax; 10348 XMVECTOR ClampMin; 10349 10350 ClampMax = XMVectorReplicate(LengthMax); 10351 ClampMin = XMVectorReplicate(LengthMin); 10352 10353 return XMVector4ClampLengthV(V, ClampMin, ClampMax); 10354 10355#elif defined(_XM_SSE_INTRINSICS_) 10356 XMVECTOR ClampMax = _mm_set_ps1(LengthMax); 10357 XMVECTOR ClampMin = _mm_set_ps1(LengthMin); 10358 return XMVector4ClampLengthV(V, ClampMin, ClampMax); 10359#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 10360#endif // _XM_VMX128_INTRINSICS_ 10361} 10362 10363//------------------------------------------------------------------------------ 10364 10365XMFINLINE XMVECTOR XMVector4ClampLengthV 10366( 10367 FXMVECTOR V, 10368 FXMVECTOR LengthMin, 10369 FXMVECTOR LengthMax 10370) 10371{ 10372#if defined(_XM_NO_INTRINSICS_) 10373 10374 XMVECTOR ClampLength; 10375 XMVECTOR LengthSq; 10376 XMVECTOR RcpLength; 10377 XMVECTOR Length; 10378 XMVECTOR Normal; 10379 XMVECTOR Zero; 10380 XMVECTOR InfiniteLength; 10381 XMVECTOR ZeroLength; 10382 XMVECTOR Select; 10383 XMVECTOR ControlMax; 10384 XMVECTOR ControlMin; 10385 XMVECTOR Control; 10386 XMVECTOR Result; 10387 10388 XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[3] == LengthMin.vector4_f32[0])); 10389 XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[3] == LengthMax.vector4_f32[0])); 10390 XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero())); 10391 XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero())); 10392 XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin)); 10393 10394 LengthSq = XMVector4LengthSq(V); 10395 10396 Zero = XMVectorZero(); 10397 10398 RcpLength = XMVectorReciprocalSqrt(LengthSq); 10399 10400 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); 10401 ZeroLength = XMVectorEqual(LengthSq, Zero); 10402 10403 Normal = XMVectorMultiply(V, RcpLength); 10404 10405 Length = XMVectorMultiply(LengthSq, RcpLength); 10406 10407 Select = XMVectorEqualInt(InfiniteLength, ZeroLength); 10408 Length = XMVectorSelect(LengthSq, Length, Select); 10409 Normal = XMVectorSelect(LengthSq, Normal, Select); 10410 10411 ControlMax = XMVectorGreater(Length, LengthMax); 10412 ControlMin = XMVectorLess(Length, LengthMin); 10413 10414 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); 10415 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); 10416 10417 Result = XMVectorMultiply(Normal, ClampLength); 10418 10419 // Preserve the original vector (with no precision loss) if the length falls within the given range 10420 Control = XMVectorEqualInt(ControlMax, ControlMin); 10421 Result = XMVectorSelect(Result, V, Control); 10422 10423 return Result; 10424 10425#elif defined(_XM_SSE_INTRINSICS_) 10426 XMVECTOR ClampLength; 10427 XMVECTOR LengthSq; 10428 XMVECTOR RcpLength; 10429 XMVECTOR Length; 10430 XMVECTOR Normal; 10431 XMVECTOR Zero; 10432 XMVECTOR InfiniteLength; 10433 XMVECTOR ZeroLength; 10434 XMVECTOR Select; 10435 XMVECTOR ControlMax; 10436 XMVECTOR ControlMin; 10437 XMVECTOR Control; 10438 XMVECTOR Result; 10439 10440 XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin))); 10441 XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax))); 10442 XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero)); 10443 XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero)); 10444 XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin)); 10445 10446 LengthSq = XMVector4LengthSq(V); 10447 Zero = XMVectorZero(); 10448 RcpLength = XMVectorReciprocalSqrt(LengthSq); 10449 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity); 10450 ZeroLength = XMVectorEqual(LengthSq, Zero); 10451 Normal = _mm_mul_ps(V, RcpLength); 10452 Length = _mm_mul_ps(LengthSq, RcpLength); 10453 Select = XMVectorEqualInt(InfiniteLength, ZeroLength); 10454 Length = XMVectorSelect(LengthSq, Length, Select); 10455 Normal = XMVectorSelect(LengthSq, Normal, Select); 10456 ControlMax = XMVectorGreater(Length, LengthMax); 10457 ControlMin = XMVectorLess(Length, LengthMin); 10458 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); 10459 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); 10460 Result = _mm_mul_ps(Normal, ClampLength); 10461 // Preserve the original vector (with no precision loss) if the length falls within the given range 10462 Control = XMVectorEqualInt(ControlMax,ControlMin); 10463 Result = XMVectorSelect(Result,V,Control); 10464 return Result; 10465 10466#else // _XM_VMX128_INTRINSICS_ 10467#endif // _XM_VMX128_INTRINSICS_ 10468} 10469 10470//------------------------------------------------------------------------------ 10471 10472XMFINLINE XMVECTOR XMVector4Reflect 10473( 10474 FXMVECTOR Incident, 10475 FXMVECTOR Normal 10476) 10477{ 10478#if defined(_XM_NO_INTRINSICS_) 10479 10480 XMVECTOR Result; 10481 10482 // Result = Incident - (2 * dot(Incident, Normal)) * Normal 10483 Result = XMVector4Dot(Incident, Normal); 10484 Result = XMVectorAdd(Result, Result); 10485 Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); 10486 10487 return Result; 10488 10489#elif defined(_XM_SSE_INTRINSICS_) 10490 // Result = Incident - (2 * dot(Incident, Normal)) * Normal 10491 XMVECTOR Result = XMVector4Dot(Incident,Normal); 10492 Result = _mm_add_ps(Result,Result); 10493 Result = _mm_mul_ps(Result,Normal); 10494 Result = _mm_sub_ps(Incident,Result); 10495 return Result; 10496#else // _XM_VMX128_INTRINSICS_ 10497#endif // _XM_VMX128_INTRINSICS_ 10498} 10499 10500//------------------------------------------------------------------------------ 10501 10502XMFINLINE XMVECTOR XMVector4Refract 10503( 10504 FXMVECTOR Incident, 10505 FXMVECTOR Normal, 10506 FLOAT RefractionIndex 10507) 10508{ 10509#if defined(_XM_NO_INTRINSICS_) 10510 10511 XMVECTOR Index; 10512 Index = XMVectorReplicate(RefractionIndex); 10513 return XMVector4RefractV(Incident, Normal, Index); 10514 10515#elif defined(_XM_SSE_INTRINSICS_) 10516 XMVECTOR Index = _mm_set_ps1(RefractionIndex); 10517 return XMVector4RefractV(Incident,Normal,Index); 10518#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 10519#endif // _XM_VMX128_INTRINSICS_ 10520} 10521 10522//------------------------------------------------------------------------------ 10523 10524XMFINLINE XMVECTOR XMVector4RefractV 10525( 10526 FXMVECTOR Incident, 10527 FXMVECTOR Normal, 10528 FXMVECTOR RefractionIndex 10529) 10530{ 10531#if defined(_XM_NO_INTRINSICS_) 10532 10533 XMVECTOR IDotN; 10534 XMVECTOR R; 10535 CONST XMVECTOR Zero = XMVectorZero(); 10536 10537 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 10538 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) 10539 10540 IDotN = XMVector4Dot(Incident, Normal); 10541 10542 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) 10543 R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); 10544 R = XMVectorMultiply(R, RefractionIndex); 10545 R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); 10546 10547 if (XMVector4LessOrEqual(R, Zero)) 10548 { 10549 // Total internal reflection 10550 return Zero; 10551 } 10552 else 10553 { 10554 XMVECTOR Result; 10555 10556 // R = RefractionIndex * IDotN + sqrt(R) 10557 R = XMVectorSqrt(R); 10558 R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); 10559 10560 // Result = RefractionIndex * Incident - Normal * R 10561 Result = XMVectorMultiply(RefractionIndex, Incident); 10562 Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); 10563 10564 return Result; 10565 } 10566 10567#elif defined(_XM_SSE_INTRINSICS_) 10568 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 10569 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) 10570 10571 XMVECTOR IDotN = XMVector4Dot(Incident,Normal); 10572 10573 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) 10574 XMVECTOR R = _mm_mul_ps(IDotN,IDotN); 10575 R = _mm_sub_ps(g_XMOne,R); 10576 R = _mm_mul_ps(R, RefractionIndex); 10577 R = _mm_mul_ps(R, RefractionIndex); 10578 R = _mm_sub_ps(g_XMOne,R); 10579 10580 XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero); 10581 if (_mm_movemask_ps(vResult)==0x0f) 10582 { 10583 // Total internal reflection 10584 vResult = g_XMZero; 10585 } 10586 else 10587 { 10588 // R = RefractionIndex * IDotN + sqrt(R) 10589 R = _mm_sqrt_ps(R); 10590 vResult = _mm_mul_ps(RefractionIndex, IDotN); 10591 R = _mm_add_ps(R,vResult); 10592 // Result = RefractionIndex * Incident - Normal * R 10593 vResult = _mm_mul_ps(RefractionIndex, Incident); 10594 R = _mm_mul_ps(R,Normal); 10595 vResult = _mm_sub_ps(vResult,R); 10596 } 10597 return vResult; 10598#else // _XM_VMX128_INTRINSICS_ 10599#endif // _XM_VMX128_INTRINSICS_ 10600} 10601 10602//------------------------------------------------------------------------------ 10603 10604XMFINLINE XMVECTOR XMVector4Orthogonal 10605( 10606 FXMVECTOR V 10607) 10608{ 10609#if defined(_XM_NO_INTRINSICS_) 10610 10611 XMVECTOR Result; 10612 Result.vector4_f32[0] = V.vector4_f32[2]; 10613 Result.vector4_f32[1] = V.vector4_f32[3]; 10614 Result.vector4_f32[2] = -V.vector4_f32[0]; 10615 Result.vector4_f32[3] = -V.vector4_f32[1]; 10616 return Result; 10617 10618#elif defined(_XM_SSE_INTRINSICS_) 10619 static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f}; 10620 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2)); 10621 vResult = _mm_mul_ps(vResult,FlipZW); 10622 return vResult; 10623#else // _XM_VMX128_INTRINSICS_ 10624#endif // _XM_VMX128_INTRINSICS_ 10625} 10626 10627//------------------------------------------------------------------------------ 10628 10629XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst 10630( 10631 FXMVECTOR N1, 10632 FXMVECTOR N2 10633) 10634{ 10635#if defined(_XM_NO_INTRINSICS_) 10636 10637 XMVECTOR NegativeOne; 10638 XMVECTOR One; 10639 XMVECTOR Result; 10640 10641 Result = XMVector4Dot(N1, N2); 10642 NegativeOne = XMVectorSplatConstant(-1, 0); 10643 One = XMVectorSplatOne(); 10644 Result = XMVectorClamp(Result, NegativeOne, One); 10645 Result = XMVectorACosEst(Result); 10646 10647 return Result; 10648 10649#elif defined(_XM_SSE_INTRINSICS_) 10650 XMVECTOR vResult = XMVector4Dot(N1,N2); 10651 // Clamp to -1.0f to 1.0f 10652 vResult = _mm_max_ps(vResult,g_XMNegativeOne); 10653 vResult = _mm_min_ps(vResult,g_XMOne);; 10654 vResult = XMVectorACosEst(vResult); 10655 return vResult; 10656#else // _XM_VMX128_INTRINSICS_ 10657#endif // _XM_VMX128_INTRINSICS_ 10658} 10659 10660//------------------------------------------------------------------------------ 10661 10662XMFINLINE XMVECTOR XMVector4AngleBetweenNormals 10663( 10664 FXMVECTOR N1, 10665 FXMVECTOR N2 10666) 10667{ 10668#if defined(_XM_NO_INTRINSICS_) 10669 10670 XMVECTOR NegativeOne; 10671 XMVECTOR One; 10672 XMVECTOR Result; 10673 10674 Result = XMVector4Dot(N1, N2); 10675 NegativeOne = XMVectorSplatConstant(-1, 0); 10676 One = XMVectorSplatOne(); 10677 Result = XMVectorClamp(Result, NegativeOne, One); 10678 Result = XMVectorACos(Result); 10679 10680 return Result; 10681 10682#elif defined(_XM_SSE_INTRINSICS_) 10683 XMVECTOR vResult = XMVector4Dot(N1,N2); 10684 // Clamp to -1.0f to 1.0f 10685 vResult = _mm_max_ps(vResult,g_XMNegativeOne); 10686 vResult = _mm_min_ps(vResult,g_XMOne);; 10687 vResult = XMVectorACos(vResult); 10688 return vResult; 10689#else // _XM_VMX128_INTRINSICS_ 10690#endif // _XM_VMX128_INTRINSICS_ 10691} 10692 10693//------------------------------------------------------------------------------ 10694 10695XMFINLINE XMVECTOR XMVector4AngleBetweenVectors 10696( 10697 FXMVECTOR V1, 10698 FXMVECTOR V2 10699) 10700{ 10701#if defined(_XM_NO_INTRINSICS_) 10702 10703 XMVECTOR L1; 10704 XMVECTOR L2; 10705 XMVECTOR Dot; 10706 XMVECTOR CosAngle; 10707 XMVECTOR NegativeOne; 10708 XMVECTOR One; 10709 XMVECTOR Result; 10710 10711 L1 = XMVector4ReciprocalLength(V1); 10712 L2 = XMVector4ReciprocalLength(V2); 10713 10714 Dot = XMVector4Dot(V1, V2); 10715 10716 L1 = XMVectorMultiply(L1, L2); 10717 10718 CosAngle = XMVectorMultiply(Dot, L1); 10719 NegativeOne = XMVectorSplatConstant(-1, 0); 10720 One = XMVectorSplatOne(); 10721 CosAngle = XMVectorClamp(CosAngle, NegativeOne, One); 10722 10723 Result = XMVectorACos(CosAngle); 10724 10725 return Result; 10726 10727#elif defined(_XM_SSE_INTRINSICS_) 10728 XMVECTOR L1; 10729 XMVECTOR L2; 10730 XMVECTOR Dot; 10731 XMVECTOR CosAngle; 10732 XMVECTOR Result; 10733 10734 L1 = XMVector4ReciprocalLength(V1); 10735 L2 = XMVector4ReciprocalLength(V2); 10736 Dot = XMVector4Dot(V1, V2); 10737 L1 = _mm_mul_ps(L1,L2); 10738 CosAngle = _mm_mul_ps(Dot,L1); 10739 CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne); 10740 Result = XMVectorACos(CosAngle); 10741 return Result; 10742 10743#else // _XM_VMX128_INTRINSICS_ 10744#endif // _XM_VMX128_INTRINSICS_ 10745} 10746 10747//------------------------------------------------------------------------------ 10748 10749XMFINLINE XMVECTOR XMVector4Transform 10750( 10751 FXMVECTOR V, 10752 CXMMATRIX M 10753) 10754{ 10755#if defined(_XM_NO_INTRINSICS_) 10756 FLOAT fX = (M.m[0][0]*V.vector4_f32[0])+(M.m[1][0]*V.vector4_f32[1])+(M.m[2][0]*V.vector4_f32[2])+(M.m[3][0]*V.vector4_f32[3]); 10757 FLOAT fY = (M.m[0][1]*V.vector4_f32[0])+(M.m[1][1]*V.vector4_f32[1])+(M.m[2][1]*V.vector4_f32[2])+(M.m[3][1]*V.vector4_f32[3]); 10758 FLOAT fZ = (M.m[0][2]*V.vector4_f32[0])+(M.m[1][2]*V.vector4_f32[1])+(M.m[2][2]*V.vector4_f32[2])+(M.m[3][2]*V.vector4_f32[3]); 10759 FLOAT fW = (M.m[0][3]*V.vector4_f32[0])+(M.m[1][3]*V.vector4_f32[1])+(M.m[2][3]*V.vector4_f32[2])+(M.m[3][3]*V.vector4_f32[3]); 10760 XMVECTOR vResult = { 10761 fX, 10762 fY, 10763 fZ, 10764 fW 10765 }; 10766 return vResult; 10767 10768#elif defined(_XM_SSE_INTRINSICS_) 10769 // Splat x,y,z and w 10770 XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); 10771 XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); 10772 XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); 10773 XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); 10774 // Mul by the matrix 10775 vTempX = _mm_mul_ps(vTempX,M.r[0]); 10776 vTempY = _mm_mul_ps(vTempY,M.r[1]); 10777 vTempZ = _mm_mul_ps(vTempZ,M.r[2]); 10778 vTempW = _mm_mul_ps(vTempW,M.r[3]); 10779 // Add them all together 10780 vTempX = _mm_add_ps(vTempX,vTempY); 10781 vTempZ = _mm_add_ps(vTempZ,vTempW); 10782 vTempX = _mm_add_ps(vTempX,vTempZ); 10783 return vTempX; 10784#else // _XM_VMX128_INTRINSICS_ 10785#endif // _XM_VMX128_INTRINSICS_ 10786} 10787 10788//------------------------------------------------------------------------------ 10789 10790XMINLINE XMFLOAT4* XMVector4TransformStream 10791( 10792 XMFLOAT4* pOutputStream, 10793 UINT OutputStride, 10794 CONST XMFLOAT4* pInputStream, 10795 UINT InputStride, 10796 UINT VectorCount, 10797 CXMMATRIX M 10798) 10799{ 10800#if defined(_XM_NO_INTRINSICS_) 10801 10802 XMVECTOR V; 10803 XMVECTOR X; 10804 XMVECTOR Y; 10805 XMVECTOR Z; 10806 XMVECTOR W; 10807 XMVECTOR Result; 10808 UINT i; 10809 BYTE* pInputVector = (BYTE*)pInputStream; 10810 BYTE* pOutputVector = (BYTE*)pOutputStream; 10811 10812 XMASSERT(pOutputStream); 10813 XMASSERT(pInputStream); 10814 10815 for (i = 0; i < VectorCount; i++) 10816 { 10817 V = XMLoadFloat4((XMFLOAT4*)pInputVector); 10818 W = XMVectorSplatW(V); 10819 Z = XMVectorSplatZ(V); 10820 Y = XMVectorSplatY(V); 10821 X = XMVectorSplatX(V); 10822// W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w); 10823// Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z); 10824// Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y); 10825// X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x); 10826 10827 Result = XMVectorMultiply(W, M.r[3]); 10828 Result = XMVectorMultiplyAdd(Z, M.r[2], Result); 10829 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 10830 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 10831 10832 XMStoreFloat4((XMFLOAT4*)pOutputVector, Result); 10833 10834 pInputVector += InputStride; 10835 pOutputVector += OutputStride; 10836 } 10837 10838 return pOutputStream; 10839 10840#elif defined(_XM_SSE_INTRINSICS_) 10841 UINT i; 10842 10843 XMASSERT(pOutputStream); 10844 XMASSERT(pInputStream); 10845 10846 const BYTE*pInputVector = reinterpret_cast<const BYTE *>(pInputStream); 10847 BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream); 10848 for (i = 0; i < VectorCount; i++) 10849 { 10850 // Fetch the row and splat it 10851 XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector)); 10852 XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1)); 10853 XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2)); 10854 XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3)); 10855 vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0)); 10856 vTempx = _mm_mul_ps(vTempx,M.r[0]); 10857 vTempy = _mm_mul_ps(vTempy,M.r[1]); 10858 vTempz = _mm_mul_ps(vTempz,M.r[2]); 10859 vTempw = _mm_mul_ps(vTempw,M.r[3]); 10860 vTempx = _mm_add_ps(vTempx,vTempy); 10861 vTempw = _mm_add_ps(vTempw,vTempz); 10862 vTempw = _mm_add_ps(vTempw,vTempx); 10863 // Store the transformed vector 10864 _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw); 10865 10866 pInputVector += InputStride; 10867 pOutputVector += OutputStride; 10868 } 10869 return pOutputStream; 10870#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 10871#endif // _XM_VMX128_INTRINSICS_ 10872} 10873 10874#ifdef __cplusplus 10875 10876/**************************************************************************** 10877 * 10878 * XMVECTOR operators 10879 * 10880 ****************************************************************************/ 10881 10882#ifndef XM_NO_OPERATOR_OVERLOADS 10883 10884//------------------------------------------------------------------------------ 10885 10886XMFINLINE XMVECTOR operator+ (FXMVECTOR V) 10887{ 10888 return V; 10889} 10890 10891//------------------------------------------------------------------------------ 10892 10893XMFINLINE XMVECTOR operator- (FXMVECTOR V) 10894{ 10895 return XMVectorNegate(V); 10896} 10897 10898//------------------------------------------------------------------------------ 10899 10900XMFINLINE XMVECTOR& operator+= 10901( 10902 XMVECTOR& V1, 10903 FXMVECTOR V2 10904) 10905{ 10906 V1 = XMVectorAdd(V1, V2); 10907 return V1; 10908} 10909 10910//------------------------------------------------------------------------------ 10911 10912XMFINLINE XMVECTOR& operator-= 10913( 10914 XMVECTOR& V1, 10915 FXMVECTOR V2 10916) 10917{ 10918 V1 = XMVectorSubtract(V1, V2); 10919 return V1; 10920} 10921 10922//------------------------------------------------------------------------------ 10923 10924XMFINLINE XMVECTOR& operator*= 10925( 10926 XMVECTOR& V1, 10927 FXMVECTOR V2 10928) 10929{ 10930 V1 = XMVectorMultiply(V1, V2); 10931 return V1; 10932} 10933 10934//------------------------------------------------------------------------------ 10935 10936XMFINLINE XMVECTOR& operator/= 10937( 10938 XMVECTOR& V1, 10939 FXMVECTOR V2 10940) 10941{ 10942 V1 = XMVectorDivide(V1,V2); 10943 return V1; 10944} 10945 10946//------------------------------------------------------------------------------ 10947 10948XMFINLINE XMVECTOR& operator*= 10949( 10950 XMVECTOR& V, 10951 CONST FLOAT S 10952) 10953{ 10954 V = XMVectorScale(V, S); 10955 return V; 10956} 10957 10958//------------------------------------------------------------------------------ 10959 10960XMFINLINE XMVECTOR& operator/= 10961( 10962 XMVECTOR& V, 10963 CONST FLOAT S 10964) 10965{ 10966 V = XMVectorScale(V, 1.0f / S); 10967 return V; 10968} 10969 10970//------------------------------------------------------------------------------ 10971 10972XMFINLINE XMVECTOR operator+ 10973( 10974 FXMVECTOR V1, 10975 FXMVECTOR V2 10976) 10977{ 10978 return XMVectorAdd(V1, V2); 10979} 10980 10981//------------------------------------------------------------------------------ 10982 10983XMFINLINE XMVECTOR operator- 10984( 10985 FXMVECTOR V1, 10986 FXMVECTOR V2 10987) 10988{ 10989 return XMVectorSubtract(V1, V2); 10990} 10991 10992//------------------------------------------------------------------------------ 10993 10994XMFINLINE XMVECTOR operator* 10995( 10996 FXMVECTOR V1, 10997 FXMVECTOR V2 10998) 10999{ 11000 return XMVectorMultiply(V1, V2); 11001} 11002 11003//------------------------------------------------------------------------------ 11004 11005XMFINLINE XMVECTOR operator/ 11006( 11007 FXMVECTOR V1, 11008 FXMVECTOR V2 11009) 11010{ 11011 return XMVectorDivide(V1,V2); 11012} 11013 11014//------------------------------------------------------------------------------ 11015 11016XMFINLINE XMVECTOR operator* 11017( 11018 FXMVECTOR V, 11019 CONST FLOAT S 11020) 11021{ 11022 return XMVectorScale(V, S); 11023} 11024 11025//------------------------------------------------------------------------------ 11026 11027XMFINLINE XMVECTOR operator/ 11028( 11029 FXMVECTOR V, 11030 CONST FLOAT S 11031) 11032{ 11033 return XMVectorScale(V, 1.0f / S); 11034} 11035 11036//------------------------------------------------------------------------------ 11037 11038XMFINLINE XMVECTOR operator* 11039( 11040 FLOAT S, 11041 FXMVECTOR V 11042) 11043{ 11044 return XMVectorScale(V, S); 11045} 11046 11047#endif // !XM_NO_OPERATOR_OVERLOADS 11048 11049/**************************************************************************** 11050 * 11051 * XMFLOAT2 operators 11052 * 11053 ****************************************************************************/ 11054 11055//------------------------------------------------------------------------------ 11056 11057XMFINLINE _XMFLOAT2::_XMFLOAT2 11058( 11059 CONST FLOAT* pArray 11060) 11061{ 11062 x = pArray[0]; 11063 y = pArray[1]; 11064} 11065 11066//------------------------------------------------------------------------------ 11067 11068XMFINLINE _XMFLOAT2& _XMFLOAT2::operator= 11069( 11070 CONST _XMFLOAT2& Float2 11071) 11072{ 11073 x = Float2.x; 11074 y = Float2.y; 11075 return *this; 11076} 11077 11078//------------------------------------------------------------------------------ 11079 11080XMFINLINE XMFLOAT2A& XMFLOAT2A::operator= 11081( 11082 CONST XMFLOAT2A& Float2 11083) 11084{ 11085 x = Float2.x; 11086 y = Float2.y; 11087 return *this; 11088} 11089 11090/**************************************************************************** 11091 * 11092 * XMHALF2 operators 11093 * 11094 ****************************************************************************/ 11095 11096//------------------------------------------------------------------------------ 11097 11098XMFINLINE _XMHALF2::_XMHALF2 11099( 11100 CONST HALF* pArray 11101) 11102{ 11103 x = pArray[0]; 11104 y = pArray[1]; 11105} 11106 11107//------------------------------------------------------------------------------ 11108 11109XMFINLINE _XMHALF2::_XMHALF2 11110( 11111 FLOAT _x, 11112 FLOAT _y 11113) 11114{ 11115 x = XMConvertFloatToHalf(_x); 11116 y = XMConvertFloatToHalf(_y); 11117} 11118 11119//------------------------------------------------------------------------------ 11120 11121XMFINLINE _XMHALF2::_XMHALF2 11122( 11123 CONST FLOAT* pArray 11124) 11125{ 11126 x = XMConvertFloatToHalf(pArray[0]); 11127 y = XMConvertFloatToHalf(pArray[1]); 11128} 11129 11130//------------------------------------------------------------------------------ 11131 11132XMFINLINE _XMHALF2& _XMHALF2::operator= 11133( 11134 CONST _XMHALF2& Half2 11135) 11136{ 11137 x = Half2.x; 11138 y = Half2.y; 11139 return *this; 11140} 11141 11142/**************************************************************************** 11143 * 11144 * XMSHORTN2 operators 11145 * 11146 ****************************************************************************/ 11147 11148//------------------------------------------------------------------------------ 11149 11150XMFINLINE _XMSHORTN2::_XMSHORTN2 11151( 11152 CONST SHORT* pArray 11153) 11154{ 11155 x = pArray[0]; 11156 y = pArray[1]; 11157} 11158 11159//------------------------------------------------------------------------------ 11160 11161XMFINLINE _XMSHORTN2::_XMSHORTN2 11162( 11163 FLOAT _x, 11164 FLOAT _y 11165) 11166{ 11167 XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 11168} 11169 11170//------------------------------------------------------------------------------ 11171 11172XMFINLINE _XMSHORTN2::_XMSHORTN2 11173( 11174 CONST FLOAT* pArray 11175) 11176{ 11177 XMStoreShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray)); 11178} 11179 11180//------------------------------------------------------------------------------ 11181 11182XMFINLINE _XMSHORTN2& _XMSHORTN2::operator= 11183( 11184 CONST _XMSHORTN2& ShortN2 11185) 11186{ 11187 x = ShortN2.x; 11188 y = ShortN2.y; 11189 return *this; 11190} 11191 11192/**************************************************************************** 11193 * 11194 * XMSHORT2 operators 11195 * 11196 ****************************************************************************/ 11197 11198//------------------------------------------------------------------------------ 11199 11200XMFINLINE _XMSHORT2::_XMSHORT2 11201( 11202 CONST SHORT* pArray 11203) 11204{ 11205 x = pArray[0]; 11206 y = pArray[1]; 11207} 11208 11209//------------------------------------------------------------------------------ 11210 11211XMFINLINE _XMSHORT2::_XMSHORT2 11212( 11213 FLOAT _x, 11214 FLOAT _y 11215) 11216{ 11217 XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 11218} 11219 11220//------------------------------------------------------------------------------ 11221 11222XMFINLINE _XMSHORT2::_XMSHORT2 11223( 11224 CONST FLOAT* pArray 11225) 11226{ 11227 XMStoreShort2(this, XMLoadFloat2((XMFLOAT2*)pArray)); 11228} 11229 11230//------------------------------------------------------------------------------ 11231 11232XMFINLINE _XMSHORT2& _XMSHORT2::operator= 11233( 11234 CONST _XMSHORT2& Short2 11235) 11236{ 11237 x = Short2.x; 11238 y = Short2.y; 11239 return *this; 11240} 11241 11242/**************************************************************************** 11243 * 11244 * XMUSHORTN2 operators 11245 * 11246 ****************************************************************************/ 11247 11248//------------------------------------------------------------------------------ 11249 11250XMFINLINE _XMUSHORTN2::_XMUSHORTN2 11251( 11252 CONST USHORT* pArray 11253) 11254{ 11255 x = pArray[0]; 11256 y = pArray[1]; 11257} 11258 11259//------------------------------------------------------------------------------ 11260 11261XMFINLINE _XMUSHORTN2::_XMUSHORTN2 11262( 11263 FLOAT _x, 11264 FLOAT _y 11265) 11266{ 11267 XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 11268} 11269 11270//------------------------------------------------------------------------------ 11271 11272XMFINLINE _XMUSHORTN2::_XMUSHORTN2 11273( 11274 CONST FLOAT* pArray 11275) 11276{ 11277 XMStoreUShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray)); 11278} 11279 11280//------------------------------------------------------------------------------ 11281 11282XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator= 11283( 11284 CONST _XMUSHORTN2& UShortN2 11285) 11286{ 11287 x = UShortN2.x; 11288 y = UShortN2.y; 11289 return *this; 11290} 11291 11292/**************************************************************************** 11293 * 11294 * XMUSHORT2 operators 11295 * 11296 ****************************************************************************/ 11297 11298//------------------------------------------------------------------------------ 11299 11300XMFINLINE _XMUSHORT2::_XMUSHORT2 11301( 11302 CONST USHORT* pArray 11303) 11304{ 11305 x = pArray[0]; 11306 y = pArray[1]; 11307} 11308 11309//------------------------------------------------------------------------------ 11310 11311XMFINLINE _XMUSHORT2::_XMUSHORT2 11312( 11313 FLOAT _x, 11314 FLOAT _y 11315) 11316{ 11317 XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 11318} 11319 11320//------------------------------------------------------------------------------ 11321 11322XMFINLINE _XMUSHORT2::_XMUSHORT2 11323( 11324 CONST FLOAT* pArray 11325) 11326{ 11327 XMStoreUShort2(this, XMLoadFloat2((XMFLOAT2*)pArray)); 11328} 11329 11330//------------------------------------------------------------------------------ 11331 11332XMFINLINE _XMUSHORT2& _XMUSHORT2::operator= 11333( 11334 CONST _XMUSHORT2& UShort2 11335) 11336{ 11337 x = UShort2.x; 11338 y = UShort2.y; 11339 return *this; 11340} 11341 11342/**************************************************************************** 11343 * 11344 * XMFLOAT3 operators 11345 * 11346 ****************************************************************************/ 11347 11348//------------------------------------------------------------------------------ 11349 11350XMFINLINE _XMFLOAT3::_XMFLOAT3 11351( 11352 CONST FLOAT* pArray 11353) 11354{ 11355 x = pArray[0]; 11356 y = pArray[1]; 11357 z = pArray[2]; 11358} 11359 11360//------------------------------------------------------------------------------ 11361 11362XMFINLINE _XMFLOAT3& _XMFLOAT3::operator= 11363( 11364 CONST _XMFLOAT3& Float3 11365) 11366{ 11367 x = Float3.x; 11368 y = Float3.y; 11369 z = Float3.z; 11370 return *this; 11371} 11372 11373//------------------------------------------------------------------------------ 11374 11375XMFINLINE XMFLOAT3A& XMFLOAT3A::operator= 11376( 11377 CONST XMFLOAT3A& Float3 11378) 11379{ 11380 x = Float3.x; 11381 y = Float3.y; 11382 z = Float3.z; 11383 return *this; 11384} 11385 11386/**************************************************************************** 11387 * 11388 * XMHENDN3 operators 11389 * 11390 ****************************************************************************/ 11391 11392//------------------------------------------------------------------------------ 11393 11394XMFINLINE _XMHENDN3::_XMHENDN3 11395( 11396 FLOAT _x, 11397 FLOAT _y, 11398 FLOAT _z 11399) 11400{ 11401 XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11402} 11403 11404//------------------------------------------------------------------------------ 11405 11406XMFINLINE _XMHENDN3::_XMHENDN3 11407( 11408 CONST FLOAT* pArray 11409) 11410{ 11411 XMStoreHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11412} 11413 11414//------------------------------------------------------------------------------ 11415 11416XMFINLINE _XMHENDN3& _XMHENDN3::operator= 11417( 11418 CONST _XMHENDN3& HenDN3 11419) 11420{ 11421 v = HenDN3.v; 11422 return *this; 11423} 11424 11425//------------------------------------------------------------------------------ 11426 11427XMFINLINE _XMHENDN3& _XMHENDN3::operator= 11428( 11429 CONST UINT Packed 11430) 11431{ 11432 v = Packed; 11433 return *this; 11434} 11435 11436/**************************************************************************** 11437 * 11438 * XMHEND3 operators 11439 * 11440 ****************************************************************************/ 11441 11442//------------------------------------------------------------------------------ 11443 11444XMFINLINE _XMHEND3::_XMHEND3 11445( 11446 FLOAT _x, 11447 FLOAT _y, 11448 FLOAT _z 11449) 11450{ 11451 XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11452} 11453 11454//------------------------------------------------------------------------------ 11455 11456XMFINLINE _XMHEND3::_XMHEND3 11457( 11458 CONST FLOAT* pArray 11459) 11460{ 11461 XMStoreHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11462} 11463 11464//------------------------------------------------------------------------------ 11465 11466XMFINLINE _XMHEND3& _XMHEND3::operator= 11467( 11468 CONST _XMHEND3& HenD3 11469) 11470{ 11471 v = HenD3.v; 11472 return *this; 11473} 11474 11475//------------------------------------------------------------------------------ 11476 11477XMFINLINE _XMHEND3& _XMHEND3::operator= 11478( 11479 CONST UINT Packed 11480) 11481{ 11482 v = Packed; 11483 return *this; 11484} 11485 11486/**************************************************************************** 11487 * 11488 * XMUHENDN3 operators 11489 * 11490 ****************************************************************************/ 11491 11492//------------------------------------------------------------------------------ 11493 11494XMFINLINE _XMUHENDN3::_XMUHENDN3 11495( 11496 FLOAT _x, 11497 FLOAT _y, 11498 FLOAT _z 11499) 11500{ 11501 XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11502} 11503 11504//------------------------------------------------------------------------------ 11505 11506XMFINLINE _XMUHENDN3::_XMUHENDN3 11507( 11508 CONST FLOAT* pArray 11509) 11510{ 11511 XMStoreUHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11512} 11513 11514//------------------------------------------------------------------------------ 11515 11516XMFINLINE _XMUHENDN3& _XMUHENDN3::operator= 11517( 11518 CONST _XMUHENDN3& UHenDN3 11519) 11520{ 11521 v = UHenDN3.v; 11522 return *this; 11523} 11524 11525//------------------------------------------------------------------------------ 11526 11527XMFINLINE _XMUHENDN3& _XMUHENDN3::operator= 11528( 11529 CONST UINT Packed 11530) 11531{ 11532 v = Packed; 11533 return *this; 11534} 11535 11536/**************************************************************************** 11537 * 11538 * XMUHEND3 operators 11539 * 11540 ****************************************************************************/ 11541 11542//------------------------------------------------------------------------------ 11543 11544XMFINLINE _XMUHEND3::_XMUHEND3 11545( 11546 FLOAT _x, 11547 FLOAT _y, 11548 FLOAT _z 11549) 11550{ 11551 XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11552} 11553 11554//------------------------------------------------------------------------------ 11555 11556XMFINLINE _XMUHEND3::_XMUHEND3 11557( 11558 CONST FLOAT* pArray 11559) 11560{ 11561 XMStoreUHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11562} 11563 11564//------------------------------------------------------------------------------ 11565 11566XMFINLINE _XMUHEND3& _XMUHEND3::operator= 11567( 11568 CONST _XMUHEND3& UHenD3 11569) 11570{ 11571 v = UHenD3.v; 11572 return *this; 11573} 11574 11575//------------------------------------------------------------------------------ 11576 11577XMFINLINE _XMUHEND3& _XMUHEND3::operator= 11578( 11579 CONST UINT Packed 11580) 11581{ 11582 v = Packed; 11583 return *this; 11584} 11585 11586/**************************************************************************** 11587 * 11588 * XMDHENN3 operators 11589 * 11590 ****************************************************************************/ 11591 11592//------------------------------------------------------------------------------ 11593 11594XMFINLINE _XMDHENN3::_XMDHENN3 11595( 11596 FLOAT _x, 11597 FLOAT _y, 11598 FLOAT _z 11599) 11600{ 11601 XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11602} 11603 11604//------------------------------------------------------------------------------ 11605 11606XMFINLINE _XMDHENN3::_XMDHENN3 11607( 11608 CONST FLOAT* pArray 11609) 11610{ 11611 XMStoreDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11612} 11613 11614//------------------------------------------------------------------------------ 11615 11616XMFINLINE _XMDHENN3& _XMDHENN3::operator= 11617( 11618 CONST _XMDHENN3& DHenN3 11619) 11620{ 11621 v = DHenN3.v; 11622 return *this; 11623} 11624 11625//------------------------------------------------------------------------------ 11626 11627XMFINLINE _XMDHENN3& _XMDHENN3::operator= 11628( 11629 CONST UINT Packed 11630) 11631{ 11632 v = Packed; 11633 return *this; 11634} 11635 11636/**************************************************************************** 11637 * 11638 * XMDHEN3 operators 11639 * 11640 ****************************************************************************/ 11641 11642//------------------------------------------------------------------------------ 11643 11644XMFINLINE _XMDHEN3::_XMDHEN3 11645( 11646 FLOAT _x, 11647 FLOAT _y, 11648 FLOAT _z 11649) 11650{ 11651 XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11652} 11653 11654//------------------------------------------------------------------------------ 11655 11656XMFINLINE _XMDHEN3::_XMDHEN3 11657( 11658 CONST FLOAT* pArray 11659) 11660{ 11661 XMStoreDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11662} 11663 11664//------------------------------------------------------------------------------ 11665 11666XMFINLINE _XMDHEN3& _XMDHEN3::operator= 11667( 11668 CONST _XMDHEN3& DHen3 11669) 11670{ 11671 v = DHen3.v; 11672 return *this; 11673} 11674 11675//------------------------------------------------------------------------------ 11676 11677XMFINLINE _XMDHEN3& _XMDHEN3::operator= 11678( 11679 CONST UINT Packed 11680) 11681{ 11682 v = Packed; 11683 return *this; 11684} 11685 11686/**************************************************************************** 11687 * 11688 * XMUDHENN3 operators 11689 * 11690 ****************************************************************************/ 11691 11692//------------------------------------------------------------------------------ 11693 11694XMFINLINE _XMUDHENN3::_XMUDHENN3 11695( 11696 FLOAT _x, 11697 FLOAT _y, 11698 FLOAT _z 11699) 11700{ 11701 XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11702} 11703 11704//------------------------------------------------------------------------------ 11705 11706XMFINLINE _XMUDHENN3::_XMUDHENN3 11707( 11708 CONST FLOAT* pArray 11709) 11710{ 11711 XMStoreUDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11712} 11713 11714//------------------------------------------------------------------------------ 11715 11716XMFINLINE _XMUDHENN3& _XMUDHENN3::operator= 11717( 11718 CONST _XMUDHENN3& UDHenN3 11719) 11720{ 11721 v = UDHenN3.v; 11722 return *this; 11723} 11724 11725//------------------------------------------------------------------------------ 11726 11727XMFINLINE _XMUDHENN3& _XMUDHENN3::operator= 11728( 11729 CONST UINT Packed 11730) 11731{ 11732 v = Packed; 11733 return *this; 11734} 11735 11736/**************************************************************************** 11737 * 11738 * XMUDHEN3 operators 11739 * 11740 ****************************************************************************/ 11741 11742//------------------------------------------------------------------------------ 11743 11744XMFINLINE _XMUDHEN3::_XMUDHEN3 11745( 11746 FLOAT _x, 11747 FLOAT _y, 11748 FLOAT _z 11749) 11750{ 11751 XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f)); 11752} 11753 11754//------------------------------------------------------------------------------ 11755 11756XMFINLINE _XMUDHEN3::_XMUDHEN3 11757( 11758 CONST FLOAT* pArray 11759) 11760{ 11761 XMStoreUDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray)); 11762} 11763 11764//------------------------------------------------------------------------------ 11765 11766XMFINLINE _XMUDHEN3& _XMUDHEN3::operator= 11767( 11768 CONST _XMUDHEN3& UDHen3 11769) 11770{ 11771 v = UDHen3.v; 11772 return *this; 11773} 11774 11775//------------------------------------------------------------------------------ 11776 11777XMFINLINE _XMUDHEN3& _XMUDHEN3::operator= 11778( 11779 CONST UINT Packed 11780) 11781{ 11782 v = Packed; 11783 return *this; 11784} 11785 11786/**************************************************************************** 11787 * 11788 * XMU565 operators 11789 * 11790 ****************************************************************************/ 11791 11792XMFINLINE _XMU565::_XMU565 11793( 11794 CONST CHAR *pArray 11795) 11796{ 11797 x = pArray[0]; 11798 y = pArray[1]; 11799 z = pArray[2]; 11800} 11801 11802XMFINLINE _XMU565::_XMU565 11803( 11804 FLOAT _x, 11805 FLOAT _y, 11806 FLOAT _z 11807) 11808{ 11809 XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f )); 11810} 11811 11812XMFINLINE _XMU565::_XMU565 11813( 11814 CONST FLOAT *pArray 11815) 11816{ 11817 XMStoreU565(this, XMLoadFloat3((XMFLOAT3*)pArray )); 11818} 11819 11820XMFINLINE _XMU565& _XMU565::operator= 11821( 11822 CONST _XMU565& U565 11823) 11824{ 11825 v = U565.v; 11826 return *this; 11827} 11828 11829XMFINLINE _XMU565& _XMU565::operator= 11830( 11831 CONST USHORT Packed 11832) 11833{ 11834 v = Packed; 11835 return *this; 11836} 11837 11838/**************************************************************************** 11839 * 11840 * XMFLOAT3PK operators 11841 * 11842 ****************************************************************************/ 11843 11844XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK 11845( 11846 FLOAT _x, 11847 FLOAT _y, 11848 FLOAT _z 11849) 11850{ 11851 XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f )); 11852} 11853 11854XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK 11855( 11856 CONST FLOAT *pArray 11857) 11858{ 11859 XMStoreFloat3PK(this, XMLoadFloat3((XMFLOAT3*)pArray )); 11860} 11861 11862XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator= 11863( 11864 CONST _XMFLOAT3PK& float3pk 11865) 11866{ 11867 v = float3pk.v; 11868 return *this; 11869} 11870 11871XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator= 11872( 11873 CONST UINT Packed 11874) 11875{ 11876 v = Packed; 11877 return *this; 11878} 11879 11880/**************************************************************************** 11881 * 11882 * XMFLOAT3SE operators 11883 * 11884 ****************************************************************************/ 11885 11886XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE 11887( 11888 FLOAT _x, 11889 FLOAT _y, 11890 FLOAT _z 11891) 11892{ 11893 XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f )); 11894} 11895 11896XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE 11897( 11898 CONST FLOAT *pArray 11899) 11900{ 11901 XMStoreFloat3SE(this, XMLoadFloat3((XMFLOAT3*)pArray )); 11902} 11903 11904XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator= 11905( 11906 CONST _XMFLOAT3SE& float3se 11907) 11908{ 11909 v = float3se.v; 11910 return *this; 11911} 11912 11913XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator= 11914( 11915 CONST UINT Packed 11916) 11917{ 11918 v = Packed; 11919 return *this; 11920} 11921 11922/**************************************************************************** 11923 * 11924 * XMFLOAT4 operators 11925 * 11926 ****************************************************************************/ 11927 11928//------------------------------------------------------------------------------ 11929 11930XMFINLINE _XMFLOAT4::_XMFLOAT4 11931( 11932 CONST FLOAT* pArray 11933) 11934{ 11935 x = pArray[0]; 11936 y = pArray[1]; 11937 z = pArray[2]; 11938 w = pArray[3]; 11939} 11940 11941//------------------------------------------------------------------------------ 11942 11943XMFINLINE _XMFLOAT4& _XMFLOAT4::operator= 11944( 11945 CONST _XMFLOAT4& Float4 11946) 11947{ 11948 x = Float4.x; 11949 y = Float4.y; 11950 z = Float4.z; 11951 w = Float4.w; 11952 return *this; 11953} 11954 11955//------------------------------------------------------------------------------ 11956 11957XMFINLINE XMFLOAT4A& XMFLOAT4A::operator= 11958( 11959 CONST XMFLOAT4A& Float4 11960) 11961{ 11962 x = Float4.x; 11963 y = Float4.y; 11964 z = Float4.z; 11965 w = Float4.w; 11966 return *this; 11967} 11968 11969/**************************************************************************** 11970 * 11971 * XMHALF4 operators 11972 * 11973 ****************************************************************************/ 11974 11975//------------------------------------------------------------------------------ 11976 11977XMFINLINE _XMHALF4::_XMHALF4 11978( 11979 CONST HALF* pArray 11980) 11981{ 11982 x = pArray[0]; 11983 y = pArray[1]; 11984 z = pArray[2]; 11985 w = pArray[3]; 11986} 11987 11988//------------------------------------------------------------------------------ 11989 11990XMFINLINE _XMHALF4::_XMHALF4 11991( 11992 FLOAT _x, 11993 FLOAT _y, 11994 FLOAT _z, 11995 FLOAT _w 11996) 11997{ 11998 x = XMConvertFloatToHalf(_x); 11999 y = XMConvertFloatToHalf(_y); 12000 z = XMConvertFloatToHalf(_z); 12001 w = XMConvertFloatToHalf(_w); 12002} 12003 12004//------------------------------------------------------------------------------ 12005 12006XMFINLINE _XMHALF4::_XMHALF4 12007( 12008 CONST FLOAT* pArray 12009) 12010{ 12011 XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4); 12012} 12013 12014//------------------------------------------------------------------------------ 12015 12016XMFINLINE _XMHALF4& _XMHALF4::operator= 12017( 12018 CONST _XMHALF4& Half4 12019) 12020{ 12021 x = Half4.x; 12022 y = Half4.y; 12023 z = Half4.z; 12024 w = Half4.w; 12025 return *this; 12026} 12027 12028/**************************************************************************** 12029 * 12030 * XMSHORTN4 operators 12031 * 12032 ****************************************************************************/ 12033 12034//------------------------------------------------------------------------------ 12035 12036XMFINLINE _XMSHORTN4::_XMSHORTN4 12037( 12038 CONST SHORT* pArray 12039) 12040{ 12041 x = pArray[0]; 12042 y = pArray[1]; 12043 z = pArray[2]; 12044 w = pArray[3]; 12045} 12046 12047//------------------------------------------------------------------------------ 12048 12049XMFINLINE _XMSHORTN4::_XMSHORTN4 12050( 12051 FLOAT _x, 12052 FLOAT _y, 12053 FLOAT _z, 12054 FLOAT _w 12055) 12056{ 12057 XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w)); 12058} 12059 12060//------------------------------------------------------------------------------ 12061 12062XMFINLINE _XMSHORTN4::_XMSHORTN4 12063( 12064 CONST FLOAT* pArray 12065) 12066{ 12067 XMStoreShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12068} 12069 12070//------------------------------------------------------------------------------ 12071 12072XMFINLINE _XMSHORTN4& _XMSHORTN4::operator= 12073( 12074 CONST _XMSHORTN4& ShortN4 12075) 12076{ 12077 x = ShortN4.x; 12078 y = ShortN4.y; 12079 z = ShortN4.z; 12080 w = ShortN4.w; 12081 return *this; 12082} 12083 12084/**************************************************************************** 12085 * 12086 * XMSHORT4 operators 12087 * 12088 ****************************************************************************/ 12089 12090//------------------------------------------------------------------------------ 12091 12092XMFINLINE _XMSHORT4::_XMSHORT4 12093( 12094 CONST SHORT* pArray 12095) 12096{ 12097 x = pArray[0]; 12098 y = pArray[1]; 12099 z = pArray[2]; 12100 w = pArray[3]; 12101} 12102 12103//------------------------------------------------------------------------------ 12104 12105XMFINLINE _XMSHORT4::_XMSHORT4 12106( 12107 FLOAT _x, 12108 FLOAT _y, 12109 FLOAT _z, 12110 FLOAT _w 12111) 12112{ 12113 XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w)); 12114} 12115 12116//------------------------------------------------------------------------------ 12117 12118XMFINLINE _XMSHORT4::_XMSHORT4 12119( 12120 CONST FLOAT* pArray 12121) 12122{ 12123 XMStoreShort4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12124} 12125 12126//------------------------------------------------------------------------------ 12127 12128XMFINLINE _XMSHORT4& _XMSHORT4::operator= 12129( 12130 CONST _XMSHORT4& Short4 12131) 12132{ 12133 x = Short4.x; 12134 y = Short4.y; 12135 z = Short4.z; 12136 w = Short4.w; 12137 return *this; 12138} 12139 12140/**************************************************************************** 12141 * 12142 * XMUSHORTN4 operators 12143 * 12144 ****************************************************************************/ 12145 12146//------------------------------------------------------------------------------ 12147 12148XMFINLINE _XMUSHORTN4::_XMUSHORTN4 12149( 12150 CONST USHORT* pArray 12151) 12152{ 12153 x = pArray[0]; 12154 y = pArray[1]; 12155 z = pArray[2]; 12156 w = pArray[3]; 12157} 12158 12159//------------------------------------------------------------------------------ 12160 12161XMFINLINE _XMUSHORTN4::_XMUSHORTN4 12162( 12163 FLOAT _x, 12164 FLOAT _y, 12165 FLOAT _z, 12166 FLOAT _w 12167) 12168{ 12169 XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w)); 12170} 12171 12172//------------------------------------------------------------------------------ 12173 12174XMFINLINE _XMUSHORTN4::_XMUSHORTN4 12175( 12176 CONST FLOAT* pArray 12177) 12178{ 12179 XMStoreUShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12180} 12181 12182//------------------------------------------------------------------------------ 12183 12184XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator= 12185( 12186 CONST _XMUSHORTN4& UShortN4 12187) 12188{ 12189 x = UShortN4.x; 12190 y = UShortN4.y; 12191 z = UShortN4.z; 12192 w = UShortN4.w; 12193 return *this; 12194} 12195 12196/**************************************************************************** 12197 * 12198 * XMUSHORT4 operators 12199 * 12200 ****************************************************************************/ 12201 12202//------------------------------------------------------------------------------ 12203 12204XMFINLINE _XMUSHORT4::_XMUSHORT4 12205( 12206 CONST USHORT* pArray 12207) 12208{ 12209 x = pArray[0]; 12210 y = pArray[1]; 12211 z = pArray[2]; 12212 w = pArray[3]; 12213} 12214 12215//------------------------------------------------------------------------------ 12216 12217XMFINLINE _XMUSHORT4::_XMUSHORT4 12218( 12219 FLOAT _x, 12220 FLOAT _y, 12221 FLOAT _z, 12222 FLOAT _w 12223) 12224{ 12225 XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w)); 12226} 12227 12228//------------------------------------------------------------------------------ 12229 12230XMFINLINE _XMUSHORT4::_XMUSHORT4 12231( 12232 CONST FLOAT* pArray 12233) 12234{ 12235 XMStoreUShort4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12236} 12237 12238//------------------------------------------------------------------------------ 12239 12240XMFINLINE _XMUSHORT4& _XMUSHORT4::operator= 12241( 12242 CONST _XMUSHORT4& UShort4 12243) 12244{ 12245 x = UShort4.x; 12246 y = UShort4.y; 12247 z = UShort4.z; 12248 w = UShort4.w; 12249 return *this; 12250} 12251 12252/**************************************************************************** 12253 * 12254 * XMXDECN4 operators 12255 * 12256 ****************************************************************************/ 12257 12258//------------------------------------------------------------------------------ 12259 12260XMFINLINE _XMXDECN4::_XMXDECN4 12261( 12262 FLOAT _x, 12263 FLOAT _y, 12264 FLOAT _z, 12265 FLOAT _w 12266) 12267{ 12268 XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w)); 12269} 12270 12271//------------------------------------------------------------------------------ 12272 12273XMFINLINE _XMXDECN4::_XMXDECN4 12274( 12275 CONST FLOAT* pArray 12276) 12277{ 12278 XMStoreXDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12279} 12280 12281//------------------------------------------------------------------------------ 12282 12283XMFINLINE _XMXDECN4& _XMXDECN4::operator= 12284( 12285 CONST _XMXDECN4& XDecN4 12286) 12287{ 12288 v = XDecN4.v; 12289 return *this; 12290} 12291 12292//------------------------------------------------------------------------------ 12293 12294XMFINLINE _XMXDECN4& _XMXDECN4::operator= 12295( 12296 CONST UINT Packed 12297) 12298{ 12299 v = Packed; 12300 return *this; 12301} 12302 12303/**************************************************************************** 12304 * 12305 * XMXDEC4 operators 12306 * 12307 ****************************************************************************/ 12308 12309//------------------------------------------------------------------------------ 12310 12311XMFINLINE _XMXDEC4::_XMXDEC4 12312( 12313 FLOAT _x, 12314 FLOAT _y, 12315 FLOAT _z, 12316 FLOAT _w 12317) 12318{ 12319 XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w)); 12320} 12321 12322//------------------------------------------------------------------------------ 12323 12324XMFINLINE _XMXDEC4::_XMXDEC4 12325( 12326 CONST FLOAT* pArray 12327) 12328{ 12329 XMStoreXDec4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12330} 12331 12332//------------------------------------------------------------------------------ 12333 12334XMFINLINE _XMXDEC4& _XMXDEC4::operator= 12335( 12336 CONST _XMXDEC4& XDec4 12337) 12338{ 12339 v = XDec4.v; 12340 return *this; 12341} 12342 12343//------------------------------------------------------------------------------ 12344 12345XMFINLINE _XMXDEC4& _XMXDEC4::operator= 12346( 12347 CONST UINT Packed 12348) 12349{ 12350 v = Packed; 12351 return *this; 12352} 12353 12354/**************************************************************************** 12355 * 12356 * XMDECN4 operators 12357 * 12358 ****************************************************************************/ 12359 12360//------------------------------------------------------------------------------ 12361 12362XMFINLINE _XMDECN4::_XMDECN4 12363( 12364 FLOAT _x, 12365 FLOAT _y, 12366 FLOAT _z, 12367 FLOAT _w 12368) 12369{ 12370 XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w)); 12371} 12372 12373//------------------------------------------------------------------------------ 12374 12375XMFINLINE _XMDECN4::_XMDECN4 12376( 12377 CONST FLOAT* pArray 12378) 12379{ 12380 XMStoreDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12381} 12382 12383//------------------------------------------------------------------------------ 12384 12385XMFINLINE _XMDECN4& _XMDECN4::operator= 12386( 12387 CONST _XMDECN4& DecN4 12388) 12389{ 12390 v = DecN4.v; 12391 return *this; 12392} 12393 12394//------------------------------------------------------------------------------ 12395 12396XMFINLINE _XMDECN4& _XMDECN4::operator= 12397( 12398 CONST UINT Packed 12399) 12400{ 12401 v = Packed; 12402 return *this; 12403} 12404 12405/**************************************************************************** 12406 * 12407 * XMDEC4 operators 12408 * 12409 ****************************************************************************/ 12410 12411//------------------------------------------------------------------------------ 12412 12413XMFINLINE _XMDEC4::_XMDEC4 12414( 12415 FLOAT _x, 12416 FLOAT _y, 12417 FLOAT _z, 12418 FLOAT _w 12419) 12420{ 12421 XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w)); 12422} 12423 12424//------------------------------------------------------------------------------ 12425 12426XMFINLINE _XMDEC4::_XMDEC4 12427( 12428 CONST FLOAT* pArray 12429) 12430{ 12431 XMStoreDec4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12432} 12433 12434//------------------------------------------------------------------------------ 12435 12436XMFINLINE _XMDEC4& _XMDEC4::operator= 12437( 12438 CONST _XMDEC4& Dec4 12439) 12440{ 12441 v = Dec4.v; 12442 return *this; 12443} 12444 12445//------------------------------------------------------------------------------ 12446 12447XMFINLINE _XMDEC4& _XMDEC4::operator= 12448( 12449 CONST UINT Packed 12450) 12451{ 12452 v = Packed; 12453 return *this; 12454} 12455 12456/**************************************************************************** 12457 * 12458 * XMUDECN4 operators 12459 * 12460 ****************************************************************************/ 12461 12462//------------------------------------------------------------------------------ 12463 12464XMFINLINE _XMUDECN4::_XMUDECN4 12465( 12466 FLOAT _x, 12467 FLOAT _y, 12468 FLOAT _z, 12469 FLOAT _w 12470) 12471{ 12472 XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w)); 12473} 12474 12475//------------------------------------------------------------------------------ 12476 12477XMFINLINE _XMUDECN4::_XMUDECN4 12478( 12479 CONST FLOAT* pArray 12480) 12481{ 12482 XMStoreUDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12483} 12484 12485//------------------------------------------------------------------------------ 12486 12487XMFINLINE _XMUDECN4& _XMUDECN4::operator= 12488( 12489 CONST _XMUDECN4& UDecN4 12490) 12491{ 12492 v = UDecN4.v; 12493 return *this; 12494} 12495 12496//------------------------------------------------------------------------------ 12497 12498XMFINLINE _XMUDECN4& _XMUDECN4::operator= 12499( 12500 CONST UINT Packed 12501) 12502{ 12503 v = Packed; 12504 return *this; 12505} 12506 12507/**************************************************************************** 12508 * 12509 * XMUDEC4 operators 12510 * 12511 ****************************************************************************/ 12512 12513//------------------------------------------------------------------------------ 12514 12515XMFINLINE _XMUDEC4::_XMUDEC4 12516( 12517 FLOAT _x, 12518 FLOAT _y, 12519 FLOAT _z, 12520 FLOAT _w 12521) 12522{ 12523 XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w)); 12524} 12525 12526//------------------------------------------------------------------------------ 12527 12528XMFINLINE _XMUDEC4::_XMUDEC4 12529( 12530 CONST FLOAT* pArray 12531) 12532{ 12533 XMStoreUDec4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12534} 12535 12536//------------------------------------------------------------------------------ 12537 12538XMFINLINE _XMUDEC4& _XMUDEC4::operator= 12539( 12540 CONST _XMUDEC4& UDec4 12541) 12542{ 12543 v = UDec4.v; 12544 return *this; 12545} 12546 12547//------------------------------------------------------------------------------ 12548 12549XMFINLINE _XMUDEC4& _XMUDEC4::operator= 12550( 12551 CONST UINT Packed 12552) 12553{ 12554 v = Packed; 12555 return *this; 12556} 12557 12558/**************************************************************************** 12559 * 12560 * XMXICON4 operators 12561 * 12562 ****************************************************************************/ 12563 12564//------------------------------------------------------------------------------ 12565 12566XMFINLINE _XMXICON4::_XMXICON4 12567( 12568 FLOAT _x, 12569 FLOAT _y, 12570 FLOAT _z, 12571 FLOAT _w 12572) 12573{ 12574 XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w)); 12575} 12576 12577//------------------------------------------------------------------------------ 12578 12579XMFINLINE _XMXICON4::_XMXICON4 12580( 12581 CONST FLOAT* pArray 12582) 12583{ 12584 XMStoreXIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12585} 12586 12587//------------------------------------------------------------------------------ 12588 12589XMFINLINE _XMXICON4& _XMXICON4::operator= 12590( 12591 CONST _XMXICON4& XIcoN4 12592) 12593{ 12594 v = XIcoN4.v; 12595 return *this; 12596} 12597 12598//------------------------------------------------------------------------------ 12599 12600XMFINLINE _XMXICON4& _XMXICON4::operator= 12601( 12602 CONST UINT64 Packed 12603) 12604{ 12605 v = Packed; 12606 return *this; 12607} 12608 12609/**************************************************************************** 12610 * 12611 * XMXICO4 operators 12612 * 12613 ****************************************************************************/ 12614 12615//------------------------------------------------------------------------------ 12616 12617XMFINLINE _XMXICO4::_XMXICO4 12618( 12619 FLOAT _x, 12620 FLOAT _y, 12621 FLOAT _z, 12622 FLOAT _w 12623) 12624{ 12625 XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w)); 12626} 12627 12628//------------------------------------------------------------------------------ 12629 12630XMFINLINE _XMXICO4::_XMXICO4 12631( 12632 CONST FLOAT* pArray 12633) 12634{ 12635 XMStoreXIco4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12636} 12637 12638//------------------------------------------------------------------------------ 12639 12640XMFINLINE _XMXICO4& _XMXICO4::operator= 12641( 12642 CONST _XMXICO4& XIco4 12643) 12644{ 12645 v = XIco4.v; 12646 return *this; 12647} 12648 12649//------------------------------------------------------------------------------ 12650 12651XMFINLINE _XMXICO4& _XMXICO4::operator= 12652( 12653 CONST UINT64 Packed 12654) 12655{ 12656 v = Packed; 12657 return *this; 12658} 12659 12660/**************************************************************************** 12661 * 12662 * XMICON4 operators 12663 * 12664 ****************************************************************************/ 12665 12666//------------------------------------------------------------------------------ 12667 12668XMFINLINE _XMICON4::_XMICON4 12669( 12670 FLOAT _x, 12671 FLOAT _y, 12672 FLOAT _z, 12673 FLOAT _w 12674) 12675{ 12676 XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w)); 12677} 12678 12679//------------------------------------------------------------------------------ 12680 12681XMFINLINE _XMICON4::_XMICON4 12682( 12683 CONST FLOAT* pArray 12684) 12685{ 12686 XMStoreIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12687} 12688 12689//------------------------------------------------------------------------------ 12690 12691XMFINLINE _XMICON4& _XMICON4::operator= 12692( 12693 CONST _XMICON4& IcoN4 12694) 12695{ 12696 v = IcoN4.v; 12697 return *this; 12698} 12699 12700//------------------------------------------------------------------------------ 12701 12702XMFINLINE _XMICON4& _XMICON4::operator= 12703( 12704 CONST UINT64 Packed 12705) 12706{ 12707 v = Packed; 12708 return *this; 12709} 12710 12711/**************************************************************************** 12712 * 12713 * XMICO4 operators 12714 * 12715 ****************************************************************************/ 12716 12717//------------------------------------------------------------------------------ 12718 12719XMFINLINE _XMICO4::_XMICO4 12720( 12721 FLOAT _x, 12722 FLOAT _y, 12723 FLOAT _z, 12724 FLOAT _w 12725) 12726{ 12727 XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w)); 12728} 12729 12730//------------------------------------------------------------------------------ 12731 12732XMFINLINE _XMICO4::_XMICO4 12733( 12734 CONST FLOAT* pArray 12735) 12736{ 12737 XMStoreIco4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12738} 12739 12740//------------------------------------------------------------------------------ 12741 12742XMFINLINE _XMICO4& _XMICO4::operator= 12743( 12744 CONST _XMICO4& Ico4 12745) 12746{ 12747 v = Ico4.v; 12748 return *this; 12749} 12750 12751//------------------------------------------------------------------------------ 12752 12753XMFINLINE _XMICO4& _XMICO4::operator= 12754( 12755 CONST UINT64 Packed 12756) 12757{ 12758 v = Packed; 12759 return *this; 12760} 12761 12762/**************************************************************************** 12763 * 12764 * XMUICON4 operators 12765 * 12766 ****************************************************************************/ 12767 12768//------------------------------------------------------------------------------ 12769 12770XMFINLINE _XMUICON4::_XMUICON4 12771( 12772 FLOAT _x, 12773 FLOAT _y, 12774 FLOAT _z, 12775 FLOAT _w 12776) 12777{ 12778 XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w)); 12779} 12780 12781//------------------------------------------------------------------------------ 12782 12783XMFINLINE _XMUICON4::_XMUICON4 12784( 12785 CONST FLOAT* pArray 12786) 12787{ 12788 XMStoreUIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12789} 12790 12791//------------------------------------------------------------------------------ 12792 12793XMFINLINE _XMUICON4& _XMUICON4::operator= 12794( 12795 CONST _XMUICON4& UIcoN4 12796) 12797{ 12798 v = UIcoN4.v; 12799 return *this; 12800} 12801 12802//------------------------------------------------------------------------------ 12803 12804XMFINLINE _XMUICON4& _XMUICON4::operator= 12805( 12806 CONST UINT64 Packed 12807) 12808{ 12809 v = Packed; 12810 return *this; 12811} 12812 12813/**************************************************************************** 12814 * 12815 * XMUICO4 operators 12816 * 12817 ****************************************************************************/ 12818 12819//------------------------------------------------------------------------------ 12820 12821XMFINLINE _XMUICO4::_XMUICO4 12822( 12823 FLOAT _x, 12824 FLOAT _y, 12825 FLOAT _z, 12826 FLOAT _w 12827) 12828{ 12829 XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w)); 12830} 12831 12832//------------------------------------------------------------------------------ 12833 12834XMFINLINE _XMUICO4::_XMUICO4 12835( 12836 CONST FLOAT* pArray 12837) 12838{ 12839 XMStoreUIco4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12840} 12841 12842//------------------------------------------------------------------------------ 12843 12844XMFINLINE _XMUICO4& _XMUICO4::operator= 12845( 12846 CONST _XMUICO4& UIco4 12847) 12848{ 12849 v = UIco4.v; 12850 return *this; 12851} 12852 12853//------------------------------------------------------------------------------ 12854 12855XMFINLINE _XMUICO4& _XMUICO4::operator= 12856( 12857 CONST UINT64 Packed 12858) 12859{ 12860 v = Packed; 12861 return *this; 12862} 12863 12864/**************************************************************************** 12865 * 12866 * XMCOLOR4 operators 12867 * 12868 ****************************************************************************/ 12869 12870//------------------------------------------------------------------------------ 12871 12872XMFINLINE _XMCOLOR::_XMCOLOR 12873( 12874 FLOAT _r, 12875 FLOAT _g, 12876 FLOAT _b, 12877 FLOAT _a 12878) 12879{ 12880 XMStoreColor(this, XMVectorSet(_r, _g, _b, _a)); 12881} 12882 12883//------------------------------------------------------------------------------ 12884 12885XMFINLINE _XMCOLOR::_XMCOLOR 12886( 12887 CONST FLOAT* pArray 12888) 12889{ 12890 XMStoreColor(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12891} 12892 12893//------------------------------------------------------------------------------ 12894 12895XMFINLINE _XMCOLOR& _XMCOLOR::operator= 12896( 12897 CONST _XMCOLOR& Color 12898) 12899{ 12900 c = Color.c; 12901 return *this; 12902} 12903 12904//------------------------------------------------------------------------------ 12905 12906XMFINLINE _XMCOLOR& _XMCOLOR::operator= 12907( 12908 CONST UINT Color 12909) 12910{ 12911 c = Color; 12912 return *this; 12913} 12914 12915/**************************************************************************** 12916 * 12917 * XMBYTEN4 operators 12918 * 12919 ****************************************************************************/ 12920 12921//------------------------------------------------------------------------------ 12922 12923XMFINLINE _XMBYTEN4::_XMBYTEN4 12924( 12925 CONST CHAR* pArray 12926) 12927{ 12928 x = pArray[0]; 12929 y = pArray[1]; 12930 z = pArray[2]; 12931 w = pArray[3]; 12932} 12933 12934//------------------------------------------------------------------------------ 12935 12936XMFINLINE _XMBYTEN4::_XMBYTEN4 12937( 12938 FLOAT _x, 12939 FLOAT _y, 12940 FLOAT _z, 12941 FLOAT _w 12942) 12943{ 12944 XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w)); 12945} 12946 12947//------------------------------------------------------------------------------ 12948 12949XMFINLINE _XMBYTEN4::_XMBYTEN4 12950( 12951 CONST FLOAT* pArray 12952) 12953{ 12954 XMStoreByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 12955} 12956 12957//------------------------------------------------------------------------------ 12958 12959XMFINLINE _XMBYTEN4& _XMBYTEN4::operator= 12960( 12961 CONST _XMBYTEN4& ByteN4 12962) 12963{ 12964 x = ByteN4.x; 12965 y = ByteN4.y; 12966 z = ByteN4.z; 12967 w = ByteN4.w; 12968 return *this; 12969} 12970 12971/**************************************************************************** 12972 * 12973 * XMBYTE4 operators 12974 * 12975 ****************************************************************************/ 12976 12977//------------------------------------------------------------------------------ 12978 12979XMFINLINE _XMBYTE4::_XMBYTE4 12980( 12981 CONST CHAR* pArray 12982) 12983{ 12984 x = pArray[0]; 12985 y = pArray[1]; 12986 z = pArray[2]; 12987 w = pArray[3]; 12988} 12989 12990//------------------------------------------------------------------------------ 12991 12992XMFINLINE _XMBYTE4::_XMBYTE4 12993( 12994 FLOAT _x, 12995 FLOAT _y, 12996 FLOAT _z, 12997 FLOAT _w 12998) 12999{ 13000 XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w)); 13001} 13002 13003//------------------------------------------------------------------------------ 13004 13005XMFINLINE _XMBYTE4::_XMBYTE4 13006( 13007 CONST FLOAT* pArray 13008) 13009{ 13010 XMStoreByte4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 13011} 13012 13013//------------------------------------------------------------------------------ 13014 13015XMFINLINE _XMBYTE4& _XMBYTE4::operator= 13016( 13017 CONST _XMBYTE4& Byte4 13018) 13019{ 13020 x = Byte4.x; 13021 y = Byte4.y; 13022 z = Byte4.z; 13023 w = Byte4.w; 13024 return *this; 13025} 13026 13027/**************************************************************************** 13028 * 13029 * XMUBYTEN4 operators 13030 * 13031 ****************************************************************************/ 13032 13033//------------------------------------------------------------------------------ 13034 13035XMFINLINE _XMUBYTEN4::_XMUBYTEN4 13036( 13037 CONST BYTE* pArray 13038) 13039{ 13040 x = pArray[0]; 13041 y = pArray[1]; 13042 z = pArray[2]; 13043 w = pArray[3]; 13044} 13045 13046//------------------------------------------------------------------------------ 13047 13048XMFINLINE _XMUBYTEN4::_XMUBYTEN4 13049( 13050 FLOAT _x, 13051 FLOAT _y, 13052 FLOAT _z, 13053 FLOAT _w 13054) 13055{ 13056 XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w)); 13057} 13058 13059//------------------------------------------------------------------------------ 13060 13061XMFINLINE _XMUBYTEN4::_XMUBYTEN4 13062( 13063 CONST FLOAT* pArray 13064) 13065{ 13066 XMStoreUByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 13067} 13068 13069//------------------------------------------------------------------------------ 13070 13071XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator= 13072( 13073 CONST _XMUBYTEN4& UByteN4 13074) 13075{ 13076 x = UByteN4.x; 13077 y = UByteN4.y; 13078 z = UByteN4.z; 13079 w = UByteN4.w; 13080 return *this; 13081} 13082 13083/**************************************************************************** 13084 * 13085 * XMUBYTE4 operators 13086 * 13087 ****************************************************************************/ 13088 13089//------------------------------------------------------------------------------ 13090 13091XMFINLINE _XMUBYTE4::_XMUBYTE4 13092( 13093 CONST BYTE* pArray 13094) 13095{ 13096 x = pArray[0]; 13097 y = pArray[1]; 13098 z = pArray[2]; 13099 w = pArray[3]; 13100} 13101 13102//------------------------------------------------------------------------------ 13103 13104XMFINLINE _XMUBYTE4::_XMUBYTE4 13105( 13106 FLOAT _x, 13107 FLOAT _y, 13108 FLOAT _z, 13109 FLOAT _w 13110) 13111{ 13112 XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w)); 13113} 13114 13115//------------------------------------------------------------------------------ 13116 13117XMFINLINE _XMUBYTE4::_XMUBYTE4 13118( 13119 CONST FLOAT* pArray 13120) 13121{ 13122 XMStoreUByte4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 13123} 13124 13125//------------------------------------------------------------------------------ 13126 13127XMFINLINE _XMUBYTE4& _XMUBYTE4::operator= 13128( 13129 CONST _XMUBYTE4& UByte4 13130) 13131{ 13132 x = UByte4.x; 13133 y = UByte4.y; 13134 z = UByte4.z; 13135 w = UByte4.w; 13136 return *this; 13137} 13138 13139/**************************************************************************** 13140 * 13141 * XMUNIBBLE4 operators 13142 * 13143 ****************************************************************************/ 13144 13145//------------------------------------------------------------------------------ 13146 13147XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4 13148( 13149 CONST CHAR *pArray 13150) 13151{ 13152 x = pArray[0]; 13153 y = pArray[1]; 13154 z = pArray[2]; 13155 w = pArray[3]; 13156} 13157 13158//------------------------------------------------------------------------------ 13159 13160XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4 13161( 13162 FLOAT _x, 13163 FLOAT _y, 13164 FLOAT _z, 13165 FLOAT _w 13166) 13167{ 13168 XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w )); 13169} 13170 13171//------------------------------------------------------------------------------ 13172 13173XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4 13174( 13175 CONST FLOAT *pArray 13176) 13177{ 13178 XMStoreUNibble4(this, XMLoadFloat4((XMFLOAT4*)pArray)); 13179} 13180 13181//------------------------------------------------------------------------------ 13182 13183XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator= 13184( 13185 CONST _XMUNIBBLE4& UNibble4 13186) 13187{ 13188 v = UNibble4.v; 13189 return *this; 13190} 13191 13192//------------------------------------------------------------------------------ 13193 13194XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator= 13195( 13196 CONST USHORT Packed 13197) 13198{ 13199 v = Packed; 13200 return *this; 13201} 13202 13203/**************************************************************************** 13204 * 13205 * XMU555 operators 13206 * 13207 ****************************************************************************/ 13208 13209//------------------------------------------------------------------------------ 13210 13211XMFINLINE _XMU555::_XMU555 13212( 13213 CONST CHAR *pArray, 13214 BOOL _w 13215) 13216{ 13217 x = pArray[0]; 13218 y = pArray[1]; 13219 z = pArray[2]; 13220 w = _w; 13221} 13222 13223//------------------------------------------------------------------------------ 13224 13225XMFINLINE _XMU555::_XMU555 13226( 13227 FLOAT _x, 13228 FLOAT _y, 13229 FLOAT _z, 13230 BOOL _w 13231) 13232{ 13233 XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) )); 13234} 13235 13236//------------------------------------------------------------------------------ 13237 13238XMFINLINE _XMU555::_XMU555 13239( 13240 CONST FLOAT *pArray, 13241 BOOL _w 13242) 13243{ 13244 XMVECTOR V = XMLoadFloat3((XMFLOAT3*)pArray); 13245 XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) )); 13246} 13247 13248//------------------------------------------------------------------------------ 13249 13250XMFINLINE _XMU555& _XMU555::operator= 13251( 13252 CONST _XMU555& U555 13253) 13254{ 13255 v = U555.v; 13256 return *this; 13257} 13258 13259//------------------------------------------------------------------------------ 13260 13261XMFINLINE _XMU555& _XMU555::operator= 13262( 13263 CONST USHORT Packed 13264) 13265{ 13266 v = Packed; 13267 return *this; 13268} 13269 13270#endif // __cplusplus 13271 13272#if defined(_XM_NO_INTRINSICS_) 13273#undef XMISNAN 13274#undef XMISINF 13275#endif 13276 13277#endif // __XNAMATHVECTOR_INL__ 13278