1/*++
2
3Copyright (c) Microsoft Corporation. All rights reserved.
4
5Module Name:
6
7    xnamathvector.inl
8
9Abstract:
10
11	XNA math library for Windows and Xbox 360: Vector functions
12--*/
13
14#if defined(_MSC_VER) && (_MSC_VER > 1000)
15#pragma once
16#endif
17
18#ifndef __XNAMATHVECTOR_INL__
19#define __XNAMATHVECTOR_INL__
20
21#if defined(_XM_NO_INTRINSICS_)
22#define XMISNAN(x)  ((*(UINT*)&(x) & 0x7F800000) == 0x7F800000 && (*(UINT*)&(x) & 0x7FFFFF) != 0)
23#define XMISINF(x)  ((*(UINT*)&(x) & 0x7FFFFFFF) == 0x7F800000)
24#endif
25
26/****************************************************************************
27 *
28 * General Vector
29 *
30 ****************************************************************************/
31
32//------------------------------------------------------------------------------
33// Assignment operations
34//------------------------------------------------------------------------------
35
36//------------------------------------------------------------------------------
37// Return a vector with all elements equaling zero
38XMFINLINE XMVECTOR XMVectorZero()
39{
40#if defined(_XM_NO_INTRINSICS_)
41    XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
42    return vResult;
43#elif defined(_XM_SSE_INTRINSICS_)
44    return _mm_setzero_ps();
45#else // _XM_VMX128_INTRINSICS_
46#endif // _XM_VMX128_INTRINSICS_
47}
48
49//------------------------------------------------------------------------------
50// Initialize a vector with four floating point values
51XMFINLINE XMVECTOR XMVectorSet
52(
53    FLOAT x,
54    FLOAT y,
55    FLOAT z,
56    FLOAT w
57)
58{
59#if defined(_XM_NO_INTRINSICS_)
60    XMVECTORF32 vResult = {x,y,z,w};
61    return vResult.v;
62#elif defined(_XM_SSE_INTRINSICS_)
63    return _mm_set_ps( w, z, y, x );
64#else // _XM_VMX128_INTRINSICS_
65#endif // _XM_VMX128_INTRINSICS_
66}
67
68//------------------------------------------------------------------------------
69// Initialize a vector with four integer values
70XMFINLINE XMVECTOR XMVectorSetInt
71(
72    UINT x,
73    UINT y,
74    UINT z,
75    UINT w
76)
77{
78#if defined(_XM_NO_INTRINSICS_)
79    XMVECTORU32 vResult = {x,y,z,w};
80    return vResult.v;
81#elif defined(_XM_SSE_INTRINSICS_)
82    __m128i V = _mm_set_epi32( w, z, y, x );
83    return reinterpret_cast<__m128 *>(&V)[0];
84#else // _XM_VMX128_INTRINSICS_
85#endif // _XM_VMX128_INTRINSICS_
86}
87
88//------------------------------------------------------------------------------
89// Initialize a vector with a replicated floating point value
90XMFINLINE XMVECTOR XMVectorReplicate
91(
92    FLOAT Value
93)
94{
95#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
96    XMVECTORF32 vResult = {Value,Value,Value,Value};
97    return vResult.v;
98#elif defined(_XM_SSE_INTRINSICS_)
99    return _mm_set_ps1( Value );
100#else // _XM_VMX128_INTRINSICS_
101#endif // _XM_VMX128_INTRINSICS_
102}
103
104//------------------------------------------------------------------------------
105// Initialize a vector with a replicated floating point value passed by pointer
106XMFINLINE XMVECTOR XMVectorReplicatePtr
107(
108    CONST FLOAT *pValue
109)
110{
111#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
112    FLOAT Value = pValue[0];
113    XMVECTORF32 vResult = {Value,Value,Value,Value};
114    return vResult.v;
115#elif defined(_XM_SSE_INTRINSICS_)
116    return _mm_load_ps1( pValue );
117#else // _XM_VMX128_INTRINSICS_
118#endif // _XM_VMX128_INTRINSICS_
119}
120
121//------------------------------------------------------------------------------
122// Initialize a vector with a replicated integer value
123XMFINLINE XMVECTOR XMVectorReplicateInt
124(
125    UINT Value
126)
127{
128#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
129    XMVECTORU32 vResult = {Value,Value,Value,Value};
130    return vResult.v;
131#elif defined(_XM_SSE_INTRINSICS_)
132    __m128i vTemp = _mm_set1_epi32( Value );
133    return reinterpret_cast<const __m128 *>(&vTemp)[0];
134#else // _XM_VMX128_INTRINSICS_
135#endif // _XM_VMX128_INTRINSICS_
136}
137
138//------------------------------------------------------------------------------
139// Initialize a vector with a replicated integer value passed by pointer
140XMFINLINE XMVECTOR XMVectorReplicateIntPtr
141(
142    CONST UINT *pValue
143)
144{
145#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
146    UINT Value = pValue[0];
147    XMVECTORU32 vResult = {Value,Value,Value,Value};
148    return vResult.v;
149#elif defined(_XM_SSE_INTRINSICS_)
150    return _mm_load_ps1(reinterpret_cast<const float *>(pValue));
151#else // _XM_VMX128_INTRINSICS_
152#endif // _XM_VMX128_INTRINSICS_
153}
154
155//------------------------------------------------------------------------------
156// Initialize a vector with all bits set (true mask)
157XMFINLINE XMVECTOR XMVectorTrueInt()
158{
159#if defined(_XM_NO_INTRINSICS_)
160    XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU};
161    return vResult.v;
162#elif defined(_XM_SSE_INTRINSICS_)
163    __m128i V = _mm_set1_epi32(-1);
164    return reinterpret_cast<__m128 *>(&V)[0];
165#else // _XM_VMX128_INTRINSICS_
166#endif // _XM_VMX128_INTRINSICS_
167}
168
169//------------------------------------------------------------------------------
170// Initialize a vector with all bits clear (false mask)
171XMFINLINE XMVECTOR XMVectorFalseInt()
172{
173#if defined(_XM_NO_INTRINSICS_)
174    XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
175    return vResult;
176#elif defined(_XM_SSE_INTRINSICS_)
177    return _mm_setzero_ps();
178#else // _XM_VMX128_INTRINSICS_
179#endif // _XM_VMX128_INTRINSICS_
180}
181
182//------------------------------------------------------------------------------
183// Replicate the x component of the vector
184XMFINLINE XMVECTOR XMVectorSplatX
185(
186    FXMVECTOR V
187)
188{
189#if defined(_XM_NO_INTRINSICS_)
190    XMVECTOR vResult;
191    vResult.vector4_f32[0] =
192    vResult.vector4_f32[1] =
193    vResult.vector4_f32[2] =
194    vResult.vector4_f32[3] = V.vector4_f32[0];
195    return vResult;
196#elif defined(_XM_SSE_INTRINSICS_)
197    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) );
198#else // _XM_VMX128_INTRINSICS_
199#endif // _XM_VMX128_INTRINSICS_
200}
201
202//------------------------------------------------------------------------------
203// Replicate the y component of the vector
204XMFINLINE XMVECTOR XMVectorSplatY
205(
206    FXMVECTOR V
207)
208{
209#if defined(_XM_NO_INTRINSICS_)
210    XMVECTOR vResult;
211    vResult.vector4_f32[0] =
212    vResult.vector4_f32[1] =
213    vResult.vector4_f32[2] =
214    vResult.vector4_f32[3] = V.vector4_f32[1];
215    return vResult;
216#elif defined(_XM_SSE_INTRINSICS_)
217    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) );
218#else // _XM_VMX128_INTRINSICS_
219#endif // _XM_VMX128_INTRINSICS_
220}
221
222//------------------------------------------------------------------------------
223// Replicate the z component of the vector
224XMFINLINE XMVECTOR XMVectorSplatZ
225(
226    FXMVECTOR V
227)
228{
229#if defined(_XM_NO_INTRINSICS_)
230    XMVECTOR vResult;
231    vResult.vector4_f32[0] =
232    vResult.vector4_f32[1] =
233    vResult.vector4_f32[2] =
234    vResult.vector4_f32[3] = V.vector4_f32[2];
235    return vResult;
236#elif defined(_XM_SSE_INTRINSICS_)
237    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) );
238#else // _XM_VMX128_INTRINSICS_
239#endif // _XM_VMX128_INTRINSICS_
240}
241
242//------------------------------------------------------------------------------
243// Replicate the w component of the vector
244XMFINLINE XMVECTOR XMVectorSplatW
245(
246    FXMVECTOR V
247)
248{
249#if defined(_XM_NO_INTRINSICS_)
250    XMVECTOR vResult;
251    vResult.vector4_f32[0] =
252    vResult.vector4_f32[1] =
253    vResult.vector4_f32[2] =
254    vResult.vector4_f32[3] = V.vector4_f32[3];
255    return vResult;
256#elif defined(_XM_SSE_INTRINSICS_)
257    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) );
258#else // _XM_VMX128_INTRINSICS_
259#endif // _XM_VMX128_INTRINSICS_
260}
261
262//------------------------------------------------------------------------------
263// Return a vector of 1.0f,1.0f,1.0f,1.0f
264XMFINLINE XMVECTOR XMVectorSplatOne()
265{
266#if defined(_XM_NO_INTRINSICS_)
267    XMVECTOR vResult;
268    vResult.vector4_f32[0] =
269    vResult.vector4_f32[1] =
270    vResult.vector4_f32[2] =
271    vResult.vector4_f32[3] = 1.0f;
272    return vResult;
273#elif defined(_XM_SSE_INTRINSICS_)
274    return g_XMOne;
275#else //  _XM_VMX128_INTRINSICS_
276#endif // _XM_VMX128_INTRINSICS_
277}
278
279//------------------------------------------------------------------------------
280// Return a vector of INF,INF,INF,INF
281XMFINLINE XMVECTOR XMVectorSplatInfinity()
282{
283#if defined(_XM_NO_INTRINSICS_)
284    XMVECTOR vResult;
285    vResult.vector4_u32[0] =
286    vResult.vector4_u32[1] =
287    vResult.vector4_u32[2] =
288    vResult.vector4_u32[3] = 0x7F800000;
289    return vResult;
290#elif defined(_XM_SSE_INTRINSICS_)
291    return g_XMInfinity;
292#else // _XM_VMX128_INTRINSICS_
293#endif // _XM_VMX128_INTRINSICS_
294}
295
296//------------------------------------------------------------------------------
297// Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN
298XMFINLINE XMVECTOR XMVectorSplatQNaN()
299{
300#if defined(_XM_NO_INTRINSICS_)
301    XMVECTOR vResult;
302    vResult.vector4_u32[0] =
303    vResult.vector4_u32[1] =
304    vResult.vector4_u32[2] =
305    vResult.vector4_u32[3] = 0x7FC00000;
306    return vResult;
307#elif defined(_XM_SSE_INTRINSICS_)
308    return g_XMQNaN;
309#else // _XM_VMX128_INTRINSICS_
310#endif // _XM_VMX128_INTRINSICS_
311}
312
313//------------------------------------------------------------------------------
314// Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f
315XMFINLINE XMVECTOR XMVectorSplatEpsilon()
316{
317#if defined(_XM_NO_INTRINSICS_)
318    XMVECTOR vResult;
319    vResult.vector4_u32[0] =
320    vResult.vector4_u32[1] =
321    vResult.vector4_u32[2] =
322    vResult.vector4_u32[3] = 0x34000000;
323    return vResult;
324#elif defined(_XM_SSE_INTRINSICS_)
325    return g_XMEpsilon;
326#else // _XM_VMX128_INTRINSICS_
327#endif // _XM_VMX128_INTRINSICS_
328}
329
330//------------------------------------------------------------------------------
331// Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f
332XMFINLINE XMVECTOR XMVectorSplatSignMask()
333{
334#if defined(_XM_NO_INTRINSICS_)
335    XMVECTOR vResult;
336    vResult.vector4_u32[0] =
337    vResult.vector4_u32[1] =
338    vResult.vector4_u32[2] =
339    vResult.vector4_u32[3] = 0x80000000U;
340    return vResult;
341#elif defined(_XM_SSE_INTRINSICS_)
342    __m128i V = _mm_set1_epi32( 0x80000000 );
343    return reinterpret_cast<__m128*>(&V)[0];
344#else // _XM_VMX128_INTRINSICS_
345#endif // _XM_VMX128_INTRINSICS_
346}
347
348//------------------------------------------------------------------------------
349// Return a floating point value via an index. This is not a recommended
350// function to use due to performance loss.
351XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i)
352{
353    XMASSERT( i <= 3 );
354#if defined(_XM_NO_INTRINSICS_)
355    return V.vector4_f32[i];
356#elif defined(_XM_SSE_INTRINSICS_)
357    return V.m128_f32[i];
358#else // _XM_VMX128_INTRINSICS_
359#endif // _XM_VMX128_INTRINSICS_
360}
361
362//------------------------------------------------------------------------------
363// Return the X component in an FPU register.
364// This causes Load/Hit/Store on VMX targets
365XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V)
366{
367#if defined(_XM_NO_INTRINSICS_)
368    return V.vector4_f32[0];
369#elif defined(_XM_SSE_INTRINSICS_)
370#if defined(_MSC_VER) && (_MSC_VER>=1500)
371    return _mm_cvtss_f32(V);
372#else
373    return V.m128_f32[0];
374#endif
375#else // _XM_VMX128_INTRINSICS_
376#endif // _XM_VMX128_INTRINSICS_
377}
378
379// Return the Y component in an FPU register.
380// This causes Load/Hit/Store on VMX targets
381XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V)
382{
383#if defined(_XM_NO_INTRINSICS_)
384    return V.vector4_f32[1];
385#elif defined(_XM_SSE_INTRINSICS_)
386#if defined(_MSC_VER) && (_MSC_VER>=1500)
387    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
388    return _mm_cvtss_f32(vTemp);
389#else
390    return V.m128_f32[1];
391#endif
392#else // _XM_VMX128_INTRINSICS_
393#endif // _XM_VMX128_INTRINSICS_
394}
395
396// Return the Z component in an FPU register.
397// This causes Load/Hit/Store on VMX targets
398XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V)
399{
400#if defined(_XM_NO_INTRINSICS_)
401    return V.vector4_f32[2];
402#elif defined(_XM_SSE_INTRINSICS_)
403#if defined(_MSC_VER) && (_MSC_VER>=1500)
404    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
405    return _mm_cvtss_f32(vTemp);
406#else
407    return V.m128_f32[2];
408#endif
409#else // _XM_VMX128_INTRINSICS_
410#endif // _XM_VMX128_INTRINSICS_
411}
412
413// Return the W component in an FPU register.
414// This causes Load/Hit/Store on VMX targets
415XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V)
416{
417#if defined(_XM_NO_INTRINSICS_)
418    return V.vector4_f32[3];
419#elif defined(_XM_SSE_INTRINSICS_)
420#if defined(_MSC_VER) && (_MSC_VER>=1500)
421    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
422    return _mm_cvtss_f32(vTemp);
423#else
424    return V.m128_f32[3];
425#endif
426#else // _XM_VMX128_INTRINSICS_
427#endif // _XM_VMX128_INTRINSICS_
428}
429
430//------------------------------------------------------------------------------
431
432// Store a component indexed by i into a 32 bit float location in memory.
433// This causes Load/Hit/Store on VMX targets
434XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i)
435{
436    XMASSERT( f != 0 );
437    XMASSERT( i <  4 );
438#if defined(_XM_NO_INTRINSICS_)
439    *f = V.vector4_f32[i];
440#elif defined(_XM_SSE_INTRINSICS_)
441    *f = V.m128_f32[i];
442#else // _XM_VMX128_INTRINSICS_
443#endif // _XM_VMX128_INTRINSICS_
444}
445
446//------------------------------------------------------------------------------
447
448// Store the X component into a 32 bit float location in memory.
449XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V)
450{
451    XMASSERT( x != 0 );
452#if defined(_XM_NO_INTRINSICS_)
453    *x = V.vector4_f32[0];
454#elif defined(_XM_SSE_INTRINSICS_)
455    _mm_store_ss(x,V);
456#else // _XM_VMX128_INTRINSICS_
457#endif // _XM_VMX128_INTRINSICS_
458}
459
460// Store the Y component into a 32 bit float location in memory.
461XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V)
462{
463    XMASSERT( y != 0 );
464#if defined(_XM_NO_INTRINSICS_)
465    *y = V.vector4_f32[1];
466#elif defined(_XM_SSE_INTRINSICS_)
467    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
468    _mm_store_ss(y,vResult);
469#else // _XM_VMX128_INTRINSICS_
470#endif // _XM_VMX128_INTRINSICS_
471}
472
473// Store the Z component into a 32 bit float location in memory.
474XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V)
475{
476    XMASSERT( z != 0 );
477#if defined(_XM_NO_INTRINSICS_)
478    *z = V.vector4_f32[2];
479#elif defined(_XM_SSE_INTRINSICS_)
480    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
481    _mm_store_ss(z,vResult);
482#else // _XM_VMX128_INTRINSICS_
483#endif // _XM_VMX128_INTRINSICS_
484}
485
486// Store the W component into a 32 bit float location in memory.
487XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V)
488{
489    XMASSERT( w != 0 );
490#if defined(_XM_NO_INTRINSICS_)
491    *w = V.vector4_f32[3];
492#elif defined(_XM_SSE_INTRINSICS_)
493    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
494    _mm_store_ss(w,vResult);
495#else // _XM_VMX128_INTRINSICS_
496#endif // _XM_VMX128_INTRINSICS_
497}
498
499//------------------------------------------------------------------------------
500
501// Return an integer value via an index. This is not a recommended
502// function to use due to performance loss.
503XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i)
504{
505    XMASSERT( i < 4 );
506#if defined(_XM_NO_INTRINSICS_)
507    return V.vector4_u32[i];
508#elif defined(_XM_SSE_INTRINSICS_)
509#if defined(_MSC_VER) && (_MSC_VER<1400)
510    XMVECTORU32 tmp;
511    tmp.v = V;
512    return tmp.u[i];
513#else
514    return V.m128_u32[i];
515#endif
516#else // _XM_VMX128_INTRINSICS_
517#endif // _XM_VMX128_INTRINSICS_
518}
519
520//------------------------------------------------------------------------------
521
522// Return the X component in an integer register.
523// This causes Load/Hit/Store on VMX targets
524XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V)
525{
526#if defined(_XM_NO_INTRINSICS_)
527    return V.vector4_u32[0];
528#elif defined(_XM_SSE_INTRINSICS_)
529    return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0]));
530#else // _XM_VMX128_INTRINSICS_
531#endif // _XM_VMX128_INTRINSICS_
532}
533
534// Return the Y component in an integer register.
535// This causes Load/Hit/Store on VMX targets
536XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V)
537{
538#if defined(_XM_NO_INTRINSICS_)
539    return V.vector4_u32[1];
540#elif defined(_XM_SSE_INTRINSICS_)
541    __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1));
542    return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
543#else // _XM_VMX128_INTRINSICS_
544#endif // _XM_VMX128_INTRINSICS_
545}
546
547// Return the Z component in an integer register.
548// This causes Load/Hit/Store on VMX targets
549XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V)
550{
551#if defined(_XM_NO_INTRINSICS_)
552    return V.vector4_u32[2];
553#elif defined(_XM_SSE_INTRINSICS_)
554    __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2));
555    return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
556#else // _XM_VMX128_INTRINSICS_
557#endif // _XM_VMX128_INTRINSICS_
558}
559
560// Return the W component in an integer register.
561// This causes Load/Hit/Store on VMX targets
562XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V)
563{
564#if defined(_XM_NO_INTRINSICS_)
565    return V.vector4_u32[3];
566#elif defined(_XM_SSE_INTRINSICS_)
567    __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3));
568    return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
569#else // _XM_VMX128_INTRINSICS_
570#endif // _XM_VMX128_INTRINSICS_
571}
572
573//------------------------------------------------------------------------------
574
575// Store a component indexed by i into a 32 bit integer location in memory.
576// This causes Load/Hit/Store on VMX targets
577XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i)
578{
579    XMASSERT( x != 0 );
580    XMASSERT( i <  4 );
581#if defined(_XM_NO_INTRINSICS_)
582    *x = V.vector4_u32[i];
583#elif defined(_XM_SSE_INTRINSICS_)
584#if defined(_MSC_VER) && (_MSC_VER<1400)
585    XMVECTORU32 tmp;
586    tmp.v = V;
587    *x = tmp.u[i];
588#else
589    *x = V.m128_u32[i];
590#endif
591#else // _XM_VMX128_INTRINSICS_
592#endif // _XM_VMX128_INTRINSICS_
593}
594
595//------------------------------------------------------------------------------
596
597// Store the X component into a 32 bit integer location in memory.
598XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V)
599{
600    XMASSERT( x != 0 );
601#if defined(_XM_NO_INTRINSICS_)
602    *x = V.vector4_u32[0];
603#elif defined(_XM_SSE_INTRINSICS_)
604    _mm_store_ss(reinterpret_cast<float *>(x),V);
605#else // _XM_VMX128_INTRINSICS_
606#endif // _XM_VMX128_INTRINSICS_
607}
608
609// Store the Y component into a 32 bit integer location in memory.
610XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V)
611{
612    XMASSERT( y != 0 );
613#if defined(_XM_NO_INTRINSICS_)
614    *y = V.vector4_u32[1];
615#elif defined(_XM_SSE_INTRINSICS_)
616    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
617    _mm_store_ss(reinterpret_cast<float *>(y),vResult);
618#else // _XM_VMX128_INTRINSICS_
619#endif // _XM_VMX128_INTRINSICS_
620}
621
622// Store the Z component into a 32 bit integer locaCantion in memory.
623XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V)
624{
625    XMASSERT( z != 0 );
626#if defined(_XM_NO_INTRINSICS_)
627    *z = V.vector4_u32[2];
628#elif defined(_XM_SSE_INTRINSICS_)
629    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
630    _mm_store_ss(reinterpret_cast<float *>(z),vResult);
631#else // _XM_VMX128_INTRINSICS_
632#endif // _XM_VMX128_INTRINSICS_
633}
634
635// Store the W component into a 32 bit integer location in memory.
636XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V)
637{
638    XMASSERT( w != 0 );
639#if defined(_XM_NO_INTRINSICS_)
640    *w = V.vector4_u32[3];
641#elif defined(_XM_SSE_INTRINSICS_)
642    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
643    _mm_store_ss(reinterpret_cast<float *>(w),vResult);
644#else // _XM_VMX128_INTRINSICS_
645#endif // _XM_VMX128_INTRINSICS_
646}
647
648//------------------------------------------------------------------------------
649
650// Set a single indexed floating point component
651// This causes Load/Hit/Store on VMX targets
652XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i)
653{
654#if defined(_XM_NO_INTRINSICS_)
655    XMVECTOR U;
656    XMASSERT( i <= 3 );
657    U = V;
658    U.vector4_f32[i] = f;
659    return U;
660#elif defined(_XM_SSE_INTRINSICS_)
661    XMASSERT( i <= 3 );
662    XMVECTOR U = V;
663    U.m128_f32[i] = f;
664    return U;
665#else // _XM_VMX128_INTRINSICS_
666#endif // _XM_VMX128_INTRINSICS_
667}
668
669//------------------------------------------------------------------------------
670
671// Sets the X component of a vector to a passed floating point value
672// This causes Load/Hit/Store on VMX targets
673XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x)
674{
675#if defined(_XM_NO_INTRINSICS_)
676    XMVECTOR U;
677    U.vector4_f32[0] = x;
678    U.vector4_f32[1] = V.vector4_f32[1];
679    U.vector4_f32[2] = V.vector4_f32[2];
680    U.vector4_f32[3] = V.vector4_f32[3];
681    return U;
682#elif defined(_XM_SSE_INTRINSICS_)
683#if defined(_XM_ISVS2005_)
684    XMVECTOR vResult = V;
685    vResult.m128_f32[0] = x;
686    return vResult;
687#else
688    XMVECTOR vResult = _mm_set_ss(x);
689    vResult = _mm_move_ss(V,vResult);
690    return vResult;
691#endif // _XM_ISVS2005_
692#else // _XM_VMX128_INTRINSICS_
693#endif // _XM_VMX128_INTRINSICS_
694}
695
696// Sets the Y component of a vector to a passed floating point value
697// This causes Load/Hit/Store on VMX targets
698XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y)
699{
700#if defined(_XM_NO_INTRINSICS_)
701    XMVECTOR U;
702    U.vector4_f32[0] = V.vector4_f32[0];
703    U.vector4_f32[1] = y;
704    U.vector4_f32[2] = V.vector4_f32[2];
705    U.vector4_f32[3] = V.vector4_f32[3];
706    return U;
707#elif defined(_XM_SSE_INTRINSICS_)
708#if defined(_XM_ISVS2005_)
709    XMVECTOR vResult = V;
710    vResult.m128_f32[1] = y;
711    return vResult;
712#else
713    // Swap y and x
714    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
715    // Convert input to vector
716    XMVECTOR vTemp = _mm_set_ss(y);
717    // Replace the x component
718    vResult = _mm_move_ss(vResult,vTemp);
719    // Swap y and x again
720    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
721    return vResult;
722#endif // _XM_ISVS2005_
723#else // _XM_VMX128_INTRINSICS_
724#endif // _XM_VMX128_INTRINSICS_
725}
726// Sets the Z component of a vector to a passed floating point value
727// This causes Load/Hit/Store on VMX targets
728XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z)
729{
730#if defined(_XM_NO_INTRINSICS_)
731    XMVECTOR U;
732    U.vector4_f32[0] = V.vector4_f32[0];
733    U.vector4_f32[1] = V.vector4_f32[1];
734    U.vector4_f32[2] = z;
735    U.vector4_f32[3] = V.vector4_f32[3];
736    return U;
737#elif defined(_XM_SSE_INTRINSICS_)
738#if defined(_XM_ISVS2005_)
739    XMVECTOR vResult = V;
740    vResult.m128_f32[2] = z;
741    return vResult;
742#else
743    // Swap z and x
744    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
745    // Convert input to vector
746    XMVECTOR vTemp = _mm_set_ss(z);
747    // Replace the x component
748    vResult = _mm_move_ss(vResult,vTemp);
749    // Swap z and x again
750    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
751    return vResult;
752#endif // _XM_ISVS2005_
753#else // _XM_VMX128_INTRINSICS_
754#endif // _XM_VMX128_INTRINSICS_
755}
756
757// Sets the W component of a vector to a passed floating point value
758// This causes Load/Hit/Store on VMX targets
759XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w)
760{
761#if defined(_XM_NO_INTRINSICS_)
762    XMVECTOR U;
763    U.vector4_f32[0] = V.vector4_f32[0];
764    U.vector4_f32[1] = V.vector4_f32[1];
765    U.vector4_f32[2] = V.vector4_f32[2];
766    U.vector4_f32[3] = w;
767    return U;
768#elif defined(_XM_SSE_INTRINSICS_)
769#if defined(_XM_ISVS2005_)
770    XMVECTOR vResult = V;
771    vResult.m128_f32[3] = w;
772    return vResult;
773#else
774    // Swap w and x
775    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
776    // Convert input to vector
777    XMVECTOR vTemp = _mm_set_ss(w);
778    // Replace the x component
779    vResult = _mm_move_ss(vResult,vTemp);
780    // Swap w and x again
781    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
782    return vResult;
783#endif // _XM_ISVS2005_
784#else // _XM_VMX128_INTRINSICS_
785#endif // _XM_VMX128_INTRINSICS_
786}
787
788//------------------------------------------------------------------------------
789
790// Sets a component of a vector to a floating point value passed by pointer
791// This causes Load/Hit/Store on VMX targets
792XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i)
793{
794#if defined(_XM_NO_INTRINSICS_)
795    XMVECTOR U;
796    XMASSERT( f != 0 );
797    XMASSERT( i <= 3 );
798    U = V;
799    U.vector4_f32[i] = *f;
800    return U;
801#elif defined(_XM_SSE_INTRINSICS_)
802    XMASSERT( f != 0 );
803    XMASSERT( i <= 3 );
804    XMVECTOR U = V;
805    U.m128_f32[i] = *f;
806    return U;
807#else // _XM_VMX128_INTRINSICS_
808#endif // _XM_VMX128_INTRINSICS_
809}
810
811//------------------------------------------------------------------------------
812
813// Sets the X component of a vector to a floating point value passed by pointer
814XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x)
815{
816#if defined(_XM_NO_INTRINSICS_)
817    XMVECTOR U;
818    XMASSERT( x != 0 );
819    U.vector4_f32[0] = *x;
820    U.vector4_f32[1] = V.vector4_f32[1];
821    U.vector4_f32[2] = V.vector4_f32[2];
822    U.vector4_f32[3] = V.vector4_f32[3];
823    return U;
824#elif defined(_XM_SSE_INTRINSICS_)
825    XMASSERT( x != 0 );
826    XMVECTOR vResult = _mm_load_ss(x);
827    vResult = _mm_move_ss(V,vResult);
828    return vResult;
829#else // _XM_VMX128_INTRINSICS_
830#endif // _XM_VMX128_INTRINSICS_
831}
832
833// Sets the Y component of a vector to a floating point value passed by pointer
834XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y)
835{
836#if defined(_XM_NO_INTRINSICS_)
837    XMVECTOR U;
838    XMASSERT( y != 0 );
839    U.vector4_f32[0] = V.vector4_f32[0];
840    U.vector4_f32[1] = *y;
841    U.vector4_f32[2] = V.vector4_f32[2];
842    U.vector4_f32[3] = V.vector4_f32[3];
843    return U;
844#elif defined(_XM_SSE_INTRINSICS_)
845    XMASSERT( y != 0 );
846    // Swap y and x
847    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
848    // Convert input to vector
849    XMVECTOR vTemp = _mm_load_ss(y);
850    // Replace the x component
851    vResult = _mm_move_ss(vResult,vTemp);
852    // Swap y and x again
853    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
854    return vResult;
855#else // _XM_VMX128_INTRINSICS_
856#endif // _XM_VMX128_INTRINSICS_
857}
858
859// Sets the Z component of a vector to a floating point value passed by pointer
860XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z)
861{
862#if defined(_XM_NO_INTRINSICS_)
863    XMVECTOR U;
864    XMASSERT( z != 0 );
865    U.vector4_f32[0] = V.vector4_f32[0];
866    U.vector4_f32[1] = V.vector4_f32[1];
867    U.vector4_f32[2] = *z;
868    U.vector4_f32[3] = V.vector4_f32[3];
869    return U;
870#elif defined(_XM_SSE_INTRINSICS_)
871    XMASSERT( z != 0 );
872    // Swap z and x
873    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
874    // Convert input to vector
875    XMVECTOR vTemp = _mm_load_ss(z);
876    // Replace the x component
877    vResult = _mm_move_ss(vResult,vTemp);
878    // Swap z and x again
879    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
880    return vResult;
881#else // _XM_VMX128_INTRINSICS_
882#endif // _XM_VMX128_INTRINSICS_
883}
884
885// Sets the W component of a vector to a floating point value passed by pointer
886XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w)
887{
888#if defined(_XM_NO_INTRINSICS_)
889    XMVECTOR U;
890    XMASSERT( w != 0 );
891    U.vector4_f32[0] = V.vector4_f32[0];
892    U.vector4_f32[1] = V.vector4_f32[1];
893    U.vector4_f32[2] = V.vector4_f32[2];
894    U.vector4_f32[3] = *w;
895    return U;
896#elif defined(_XM_SSE_INTRINSICS_)
897    XMASSERT( w != 0 );
898    // Swap w and x
899    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
900    // Convert input to vector
901    XMVECTOR vTemp = _mm_load_ss(w);
902    // Replace the x component
903    vResult = _mm_move_ss(vResult,vTemp);
904    // Swap w and x again
905    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
906    return vResult;
907#else // _XM_VMX128_INTRINSICS_
908#endif // _XM_VMX128_INTRINSICS_
909}
910
911//------------------------------------------------------------------------------
912
913// Sets a component of a vector to an integer passed by value
914// This causes Load/Hit/Store on VMX targets
915XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i)
916{
917#if defined(_XM_NO_INTRINSICS_)
918    XMVECTOR U;
919    XMASSERT( i <= 3 );
920    U = V;
921    U.vector4_u32[i] = x;
922    return U;
923#elif defined(_XM_SSE_INTRINSICS_)
924    XMASSERT( i <= 3 );
925    XMVECTORU32 tmp;
926    tmp.v = V;
927    tmp.u[i] = x;
928    return tmp;
929#else // _XM_VMX128_INTRINSICS_
930#endif // _XM_VMX128_INTRINSICS_
931}
932
933//------------------------------------------------------------------------------
934
935// Sets the X component of a vector to an integer passed by value
936// This causes Load/Hit/Store on VMX targets
937XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x)
938{
939#if defined(_XM_NO_INTRINSICS_)
940    XMVECTOR U;
941    U.vector4_u32[0] = x;
942    U.vector4_u32[1] = V.vector4_u32[1];
943    U.vector4_u32[2] = V.vector4_u32[2];
944    U.vector4_u32[3] = V.vector4_u32[3];
945    return U;
946#elif defined(_XM_SSE_INTRINSICS_)
947#if defined(_XM_ISVS2005_)
948    XMVECTOR vResult = V;
949    vResult.m128_i32[0] = x;
950    return vResult;
951#else
952    __m128i vTemp = _mm_cvtsi32_si128(x);
953    XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]);
954    return vResult;
955#endif // _XM_ISVS2005_
956#else // _XM_VMX128_INTRINSICS_
957#endif // _XM_VMX128_INTRINSICS_
958}
959
960// Sets the Y component of a vector to an integer passed by value
961// This causes Load/Hit/Store on VMX targets
962XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y)
963{
964#if defined(_XM_NO_INTRINSICS_)
965    XMVECTOR U;
966    U.vector4_u32[0] = V.vector4_u32[0];
967    U.vector4_u32[1] = y;
968    U.vector4_u32[2] = V.vector4_u32[2];
969    U.vector4_u32[3] = V.vector4_u32[3];
970    return U;
971#elif defined(_XM_SSE_INTRINSICS_)
972#if defined(_XM_ISVS2005_)
973    XMVECTOR vResult = V;
974    vResult.m128_i32[1] = y;
975    return vResult;
976#else    // Swap y and x
977    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
978    // Convert input to vector
979    __m128i vTemp = _mm_cvtsi32_si128(y);
980    // Replace the x component
981    vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
982    // Swap y and x again
983    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
984    return vResult;
985#endif // _XM_ISVS2005_
986#else // _XM_VMX128_INTRINSICS_
987#endif // _XM_VMX128_INTRINSICS_
988}
989
990// Sets the Z component of a vector to an integer passed by value
991// This causes Load/Hit/Store on VMX targets
992XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z)
993{
994#if defined(_XM_NO_INTRINSICS_)
995    XMVECTOR U;
996    U.vector4_u32[0] = V.vector4_u32[0];
997    U.vector4_u32[1] = V.vector4_u32[1];
998    U.vector4_u32[2] = z;
999    U.vector4_u32[3] = V.vector4_u32[3];
1000    return U;
1001#elif defined(_XM_SSE_INTRINSICS_)
1002#if defined(_XM_ISVS2005_)
1003    XMVECTOR vResult = V;
1004    vResult.m128_i32[2] = z;
1005    return vResult;
1006#else
1007    // Swap z and x
1008    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
1009    // Convert input to vector
1010    __m128i vTemp = _mm_cvtsi32_si128(z);
1011    // Replace the x component
1012    vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
1013    // Swap z and x again
1014    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
1015    return vResult;
1016#endif // _XM_ISVS2005_
1017#else // _XM_VMX128_INTRINSICS_
1018#endif // _XM_VMX128_INTRINSICS_
1019}
1020
1021// Sets the W component of a vector to an integer passed by value
1022// This causes Load/Hit/Store on VMX targets
1023XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w)
1024{
1025#if defined(_XM_NO_INTRINSICS_)
1026    XMVECTOR U;
1027    U.vector4_u32[0] = V.vector4_u32[0];
1028    U.vector4_u32[1] = V.vector4_u32[1];
1029    U.vector4_u32[2] = V.vector4_u32[2];
1030    U.vector4_u32[3] = w;
1031    return U;
1032#elif defined(_XM_SSE_INTRINSICS_)
1033#if defined(_XM_ISVS2005_)
1034    XMVECTOR vResult = V;
1035    vResult.m128_i32[3] = w;
1036    return vResult;
1037#else
1038    // Swap w and x
1039    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
1040    // Convert input to vector
1041    __m128i vTemp = _mm_cvtsi32_si128(w);
1042    // Replace the x component
1043    vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
1044    // Swap w and x again
1045    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
1046    return vResult;
1047#endif // _XM_ISVS2005_
1048#else // _XM_VMX128_INTRINSICS_
1049#endif // _XM_VMX128_INTRINSICS_
1050}
1051
1052//------------------------------------------------------------------------------
1053
1054// Sets a component of a vector to an integer value passed by pointer
1055// This causes Load/Hit/Store on VMX targets
1056XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i)
1057{
1058#if defined(_XM_NO_INTRINSICS_)
1059    XMVECTOR U;
1060    XMASSERT( x != 0 );
1061    XMASSERT( i <= 3 );
1062    U = V;
1063    U.vector4_u32[i] = *x;
1064    return U;
1065#elif defined(_XM_SSE_INTRINSICS_)
1066    XMASSERT( x != 0 );
1067    XMASSERT( i <= 3 );
1068    XMVECTORU32 tmp;
1069    tmp.v = V;
1070    tmp.u[i] = *x;
1071    return tmp;
1072#else // _XM_VMX128_INTRINSICS_
1073#endif // _XM_VMX128_INTRINSICS_
1074}
1075
1076//------------------------------------------------------------------------------
1077
1078// Sets the X component of a vector to an integer value passed by pointer
1079XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x)
1080{
1081#if defined(_XM_NO_INTRINSICS_)
1082    XMVECTOR U;
1083    XMASSERT( x != 0 );
1084    U.vector4_u32[0] = *x;
1085    U.vector4_u32[1] = V.vector4_u32[1];
1086    U.vector4_u32[2] = V.vector4_u32[2];
1087    U.vector4_u32[3] = V.vector4_u32[3];
1088    return U;
1089#elif defined(_XM_SSE_INTRINSICS_)
1090    XMASSERT( x != 0 );
1091    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x));
1092    XMVECTOR vResult = _mm_move_ss(V,vTemp);
1093    return vResult;
1094#else // _XM_VMX128_INTRINSICS_
1095#endif // _XM_VMX128_INTRINSICS_
1096}
1097
1098// Sets the Y component of a vector to an integer value passed by pointer
1099XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y)
1100{
1101#if defined(_XM_NO_INTRINSICS_)
1102    XMVECTOR U;
1103    XMASSERT( y != 0 );
1104    U.vector4_u32[0] = V.vector4_u32[0];
1105    U.vector4_u32[1] = *y;
1106    U.vector4_u32[2] = V.vector4_u32[2];
1107    U.vector4_u32[3] = V.vector4_u32[3];
1108    return U;
1109#elif defined(_XM_SSE_INTRINSICS_)
1110    XMASSERT( y != 0 );
1111    // Swap y and x
1112    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
1113    // Convert input to vector
1114    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y));
1115    // Replace the x component
1116    vResult = _mm_move_ss(vResult,vTemp);
1117    // Swap y and x again
1118    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
1119    return vResult;
1120#else // _XM_VMX128_INTRINSICS_
1121#endif // _XM_VMX128_INTRINSICS_
1122}
1123
1124// Sets the Z component of a vector to an integer value passed by pointer
1125XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z)
1126{
1127#if defined(_XM_NO_INTRINSICS_)
1128    XMVECTOR U;
1129    XMASSERT( z != 0 );
1130    U.vector4_u32[0] = V.vector4_u32[0];
1131    U.vector4_u32[1] = V.vector4_u32[1];
1132    U.vector4_u32[2] = *z;
1133    U.vector4_u32[3] = V.vector4_u32[3];
1134    return U;
1135#elif defined(_XM_SSE_INTRINSICS_)
1136    XMASSERT( z != 0 );
1137    // Swap z and x
1138    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
1139    // Convert input to vector
1140    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z));
1141    // Replace the x component
1142    vResult = _mm_move_ss(vResult,vTemp);
1143    // Swap z and x again
1144    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
1145    return vResult;
1146#else // _XM_VMX128_INTRINSICS_
1147#endif // _XM_VMX128_INTRINSICS_
1148}
1149
1150// Sets the W component of a vector to an integer value passed by pointer
1151XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w)
1152{
1153#if defined(_XM_NO_INTRINSICS_)
1154    XMVECTOR U;
1155    XMASSERT( w != 0 );
1156    U.vector4_u32[0] = V.vector4_u32[0];
1157    U.vector4_u32[1] = V.vector4_u32[1];
1158    U.vector4_u32[2] = V.vector4_u32[2];
1159    U.vector4_u32[3] = *w;
1160    return U;
1161#elif defined(_XM_SSE_INTRINSICS_)
1162    XMASSERT( w != 0 );
1163    // Swap w and x
1164    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
1165    // Convert input to vector
1166    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w));
1167    // Replace the x component
1168    vResult = _mm_move_ss(vResult,vTemp);
1169    // Swap w and x again
1170    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
1171    return vResult;
1172#else // _XM_VMX128_INTRINSICS_
1173#endif // _XM_VMX128_INTRINSICS_
1174}
1175
1176//------------------------------------------------------------------------------
1177// Define a control vector to be used in XMVectorPermute
1178// operations.  Visualize the two vectors V1 and V2 given
1179// in a permute as arranged back to back in a linear fashion,
1180// such that they form an array of 8 floating point values.
1181// The four integers specified in XMVectorPermuteControl
1182// will serve as indices into the array to select components
1183// from the two vectors.  ElementIndex0 is used to select
1184// an element from the vectors to be placed in the first
1185// component of the resulting vector, ElementIndex1 is used
1186// to select an element for the second component, etc.
1187
1188XMFINLINE XMVECTOR XMVectorPermuteControl
1189(
1190    UINT     ElementIndex0,
1191    UINT     ElementIndex1,
1192    UINT     ElementIndex2,
1193    UINT     ElementIndex3
1194)
1195{
1196#if defined(_XM_SSE_INTRINSICS_) || defined(_XM_NO_INTRINSICS_)
1197    XMVECTORU32 vControl;
1198    static CONST UINT ControlElement[] = {
1199                    XM_PERMUTE_0X,
1200                    XM_PERMUTE_0Y,
1201                    XM_PERMUTE_0Z,
1202                    XM_PERMUTE_0W,
1203                    XM_PERMUTE_1X,
1204                    XM_PERMUTE_1Y,
1205                    XM_PERMUTE_1Z,
1206                    XM_PERMUTE_1W
1207                };
1208    XMASSERT(ElementIndex0 < 8);
1209    XMASSERT(ElementIndex1 < 8);
1210    XMASSERT(ElementIndex2 < 8);
1211    XMASSERT(ElementIndex3 < 8);
1212
1213    vControl.u[0] = ControlElement[ElementIndex0];
1214    vControl.u[1] = ControlElement[ElementIndex1];
1215    vControl.u[2] = ControlElement[ElementIndex2];
1216    vControl.u[3] = ControlElement[ElementIndex3];
1217    return vControl.v;
1218#else
1219#endif
1220}
1221
1222//------------------------------------------------------------------------------
1223
1224// Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte
1225// entries into a single 16 byte vector and return it. Index 0-15 = V1,
1226// 16-31 = V2
1227XMFINLINE XMVECTOR XMVectorPermute
1228(
1229    FXMVECTOR V1,
1230    FXMVECTOR V2,
1231    FXMVECTOR Control
1232)
1233{
1234#if defined(_XM_NO_INTRINSICS_)
1235    const BYTE *aByte[2];
1236    XMVECTOR Result;
1237    UINT i, uIndex, VectorIndex;
1238    const BYTE *pControl;
1239    BYTE *pWork;
1240
1241    // Indices must be in range from 0 to 31
1242    XMASSERT((Control.vector4_u32[0] & 0xE0E0E0E0) == 0);
1243    XMASSERT((Control.vector4_u32[1] & 0xE0E0E0E0) == 0);
1244    XMASSERT((Control.vector4_u32[2] & 0xE0E0E0E0) == 0);
1245    XMASSERT((Control.vector4_u32[3] & 0xE0E0E0E0) == 0);
1246
1247    // 0-15 = V1, 16-31 = V2
1248    aByte[0] = (const BYTE*)(&V1);
1249    aByte[1] = (const BYTE*)(&V2);
1250    i = 16;
1251    pControl = (const BYTE *)(&Control);
1252    pWork = (BYTE *)(&Result);
1253    do {
1254        // Get the byte to map from
1255        uIndex = pControl[0];
1256        ++pControl;
1257        VectorIndex = (uIndex>>4)&1;
1258        uIndex &= 0x0F;
1259#if defined(_XM_LITTLEENDIAN_)
1260        uIndex ^= 3; // Swap byte ordering on little endian machines
1261#endif
1262        pWork[0] = aByte[VectorIndex][uIndex];
1263        ++pWork;
1264    } while (--i);
1265    return Result;
1266#elif defined(_XM_SSE_INTRINSICS_)
1267#if defined(_PREFAST_) || defined(XMDEBUG)
1268    // Indices must be in range from 0 to 31
1269    static const XMVECTORI32 PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0};
1270    XMVECTOR vAssert = _mm_and_ps(Control,PremuteTest);
1271    __m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero);
1272    XMASSERT(_mm_movemask_ps(*reinterpret_cast<const __m128 *>(&vAsserti)) == 0xf);
1273#endif
1274    // Store the vectors onto local memory on the stack
1275    XMVECTOR Array[2];
1276    Array[0] = V1;
1277    Array[1] = V2;
1278    // Output vector, on the stack
1279    XMVECTORU8 vResult;
1280    // Get pointer to the two vectors on the stack
1281    const BYTE *pInput = reinterpret_cast<const BYTE *>(Array);
1282    // Store the Control vector on the stack to access the bytes
1283    // don't use Control, it can cause a register variable to spill on the stack.
1284    XMVECTORU8 vControl;
1285    vControl.v = Control;   // Write to memory
1286    UINT i = 0;
1287    do {
1288        UINT ComponentIndex = vControl.u[i] & 0x1FU;
1289        ComponentIndex ^= 3; // Swap byte ordering
1290        vResult.u[i] = pInput[ComponentIndex];
1291    } while (++i<16);
1292    return vResult;
1293#else // _XM_SSE_INTRINSICS_
1294#endif // _XM_VMX128_INTRINSICS_
1295}
1296
1297//------------------------------------------------------------------------------
1298// Define a control vector to be used in XMVectorSelect
1299// operations.  The four integers specified in XMVectorSelectControl
1300// serve as indices to select between components in two vectors.
1301// The first index controls selection for the first component of
1302// the vectors involved in a select operation, the second index
1303// controls selection for the second component etc.  A value of
1304// zero for an index causes the corresponding component from the first
1305// vector to be selected whereas a one causes the component from the
1306// second vector to be selected instead.
1307
1308XMFINLINE XMVECTOR XMVectorSelectControl
1309(
1310    UINT VectorIndex0,
1311    UINT VectorIndex1,
1312    UINT VectorIndex2,
1313    UINT VectorIndex3
1314)
1315{
1316#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1317    // x=Index0,y=Index1,z=Index2,w=Index3
1318    __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0);
1319    // Any non-zero entries become 0xFFFFFFFF else 0
1320    vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero);
1321	return reinterpret_cast<__m128 *>(&vTemp)[0];
1322#else
1323    XMVECTOR    ControlVector;
1324    CONST UINT  ControlElement[] =
1325                {
1326                    XM_SELECT_0,
1327                    XM_SELECT_1
1328                };
1329
1330    XMASSERT(VectorIndex0 < 2);
1331    XMASSERT(VectorIndex1 < 2);
1332    XMASSERT(VectorIndex2 < 2);
1333    XMASSERT(VectorIndex3 < 2);
1334
1335    ControlVector.vector4_u32[0] = ControlElement[VectorIndex0];
1336    ControlVector.vector4_u32[1] = ControlElement[VectorIndex1];
1337    ControlVector.vector4_u32[2] = ControlElement[VectorIndex2];
1338    ControlVector.vector4_u32[3] = ControlElement[VectorIndex3];
1339
1340    return ControlVector;
1341
1342#endif
1343}
1344
1345//------------------------------------------------------------------------------
1346
1347XMFINLINE XMVECTOR XMVectorSelect
1348(
1349    FXMVECTOR V1,
1350    FXMVECTOR V2,
1351    FXMVECTOR Control
1352)
1353{
1354#if defined(_XM_NO_INTRINSICS_)
1355
1356    XMVECTOR Result;
1357
1358    Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]);
1359    Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]);
1360    Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]);
1361    Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]);
1362
1363    return Result;
1364
1365#elif defined(_XM_SSE_INTRINSICS_)
1366	XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1);
1367    XMVECTOR vTemp2 = _mm_and_ps(V2,Control);
1368    return _mm_or_ps(vTemp1,vTemp2);
1369#else // _XM_VMX128_INTRINSICS_
1370#endif // _XM_VMX128_INTRINSICS_
1371}
1372
1373//------------------------------------------------------------------------------
1374
1375XMFINLINE XMVECTOR XMVectorMergeXY
1376(
1377    FXMVECTOR V1,
1378    FXMVECTOR V2
1379)
1380{
1381#if defined(_XM_NO_INTRINSICS_)
1382
1383    XMVECTOR Result;
1384
1385    Result.vector4_u32[0] = V1.vector4_u32[0];
1386    Result.vector4_u32[1] = V2.vector4_u32[0];
1387    Result.vector4_u32[2] = V1.vector4_u32[1];
1388    Result.vector4_u32[3] = V2.vector4_u32[1];
1389
1390    return Result;
1391
1392#elif defined(_XM_SSE_INTRINSICS_)
1393	return _mm_unpacklo_ps( V1, V2 );
1394#else // _XM_VMX128_INTRINSICS_
1395#endif // _XM_VMX128_INTRINSICS_
1396}
1397
1398//------------------------------------------------------------------------------
1399
1400XMFINLINE XMVECTOR XMVectorMergeZW
1401(
1402    FXMVECTOR V1,
1403    FXMVECTOR V2
1404)
1405{
1406#if defined(_XM_NO_INTRINSICS_)
1407
1408    XMVECTOR Result;
1409
1410    Result.vector4_u32[0] = V1.vector4_u32[2];
1411    Result.vector4_u32[1] = V2.vector4_u32[2];
1412    Result.vector4_u32[2] = V1.vector4_u32[3];
1413    Result.vector4_u32[3] = V2.vector4_u32[3];
1414
1415    return Result;
1416
1417#elif defined(_XM_SSE_INTRINSICS_)
1418	return _mm_unpackhi_ps( V1, V2 );
1419#else // _XM_VMX128_INTRINSICS_
1420#endif // _XM_VMX128_INTRINSICS_
1421}
1422
1423//------------------------------------------------------------------------------
1424// Comparison operations
1425//------------------------------------------------------------------------------
1426
1427//------------------------------------------------------------------------------
1428
1429XMFINLINE XMVECTOR XMVectorEqual
1430(
1431    FXMVECTOR V1,
1432    FXMVECTOR V2
1433)
1434{
1435#if defined(_XM_NO_INTRINSICS_)
1436
1437    XMVECTOR Control;
1438
1439    Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1440    Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1441    Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1442    Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1443
1444    return Control;
1445
1446#elif defined(_XM_SSE_INTRINSICS_)
1447	return _mm_cmpeq_ps( V1, V2 );
1448#else // _XM_VMX128_INTRINSICS_
1449#endif // _XM_VMX128_INTRINSICS_
1450}
1451
1452//------------------------------------------------------------------------------
1453
1454XMFINLINE XMVECTOR XMVectorEqualR
1455(
1456    UINT*    pCR,
1457    FXMVECTOR V1,
1458    FXMVECTOR V2
1459)
1460{
1461#if defined(_XM_NO_INTRINSICS_)
1462    UINT ux, uy, uz, uw, CR;
1463    XMVECTOR Control;
1464
1465    XMASSERT( pCR );
1466
1467    ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1468    uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1469    uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1470    uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1471    CR = 0;
1472    if (ux&uy&uz&uw)
1473    {
1474        // All elements are greater
1475        CR = XM_CRMASK_CR6TRUE;
1476    }
1477    else if (!(ux|uy|uz|uw))
1478    {
1479        // All elements are not greater
1480        CR = XM_CRMASK_CR6FALSE;
1481    }
1482    *pCR = CR;
1483    Control.vector4_u32[0] = ux;
1484    Control.vector4_u32[1] = uy;
1485    Control.vector4_u32[2] = uz;
1486    Control.vector4_u32[3] = uw;
1487    return Control;
1488
1489#elif defined(_XM_SSE_INTRINSICS_)
1490    XMASSERT( pCR );
1491    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
1492    UINT CR = 0;
1493    int iTest = _mm_movemask_ps(vTemp);
1494    if (iTest==0xf)
1495    {
1496        CR = XM_CRMASK_CR6TRUE;
1497    }
1498    else if (!iTest)
1499    {
1500        // All elements are not greater
1501        CR = XM_CRMASK_CR6FALSE;
1502    }
1503    *pCR = CR;
1504    return vTemp;
1505#else // _XM_VMX128_INTRINSICS_
1506#endif // _XM_VMX128_INTRINSICS_
1507}
1508
1509//------------------------------------------------------------------------------
1510// Treat the components of the vectors as unsigned integers and
1511// compare individual bits between the two.  This is useful for
1512// comparing control vectors and result vectors returned from
1513// other comparison operations.
1514
1515XMFINLINE XMVECTOR XMVectorEqualInt
1516(
1517    FXMVECTOR V1,
1518    FXMVECTOR V2
1519)
1520{
1521#if defined(_XM_NO_INTRINSICS_)
1522
1523    XMVECTOR Control;
1524
1525    Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0;
1526    Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0;
1527    Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0;
1528    Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0;
1529
1530    return Control;
1531
1532#elif defined(_XM_SSE_INTRINSICS_)
1533	__m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1534    return reinterpret_cast<__m128 *>(&V)[0];
1535#else // _XM_VMX128_INTRINSICS_
1536#endif // _XM_VMX128_INTRINSICS_
1537}
1538
1539//------------------------------------------------------------------------------
1540
1541XMFINLINE XMVECTOR XMVectorEqualIntR
1542(
1543    UINT*    pCR,
1544    FXMVECTOR V1,
1545    FXMVECTOR V2
1546)
1547{
1548#if defined(_XM_NO_INTRINSICS_)
1549
1550    XMVECTOR Control;
1551
1552    XMASSERT(pCR);
1553
1554    Control = XMVectorEqualInt(V1, V2);
1555
1556    *pCR = 0;
1557
1558    if (XMVector4EqualInt(Control, XMVectorTrueInt()))
1559    {
1560        // All elements are equal
1561        *pCR |= XM_CRMASK_CR6TRUE;
1562    }
1563    else if (XMVector4EqualInt(Control, XMVectorFalseInt()))
1564    {
1565        // All elements are not equal
1566        *pCR |= XM_CRMASK_CR6FALSE;
1567    }
1568
1569    return Control;
1570
1571#elif defined(_XM_SSE_INTRINSICS_)
1572    XMASSERT(pCR);
1573    __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1574    int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]);
1575    UINT CR = 0;
1576    if (iTemp==0x0F)
1577    {
1578        CR = XM_CRMASK_CR6TRUE;
1579    }
1580    else if (!iTemp)
1581    {
1582        CR = XM_CRMASK_CR6FALSE;
1583    }
1584    *pCR = CR;
1585    return reinterpret_cast<__m128 *>(&V)[0];
1586#else // _XM_VMX128_INTRINSICS_
1587#endif // _XM_VMX128_INTRINSICS_
1588}
1589
1590//------------------------------------------------------------------------------
1591
1592XMFINLINE XMVECTOR XMVectorNearEqual
1593(
1594    FXMVECTOR V1,
1595    FXMVECTOR V2,
1596    FXMVECTOR Epsilon
1597)
1598{
1599#if defined(_XM_NO_INTRINSICS_)
1600
1601    FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw;
1602    XMVECTOR Control;
1603
1604    fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0];
1605    fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1];
1606    fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2];
1607    fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3];
1608
1609    fDeltax = fabsf(fDeltax);
1610    fDeltay = fabsf(fDeltay);
1611    fDeltaz = fabsf(fDeltaz);
1612    fDeltaw = fabsf(fDeltaw);
1613
1614    Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1615    Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1616    Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1617    Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1618
1619    return Control;
1620
1621#elif defined(_XM_SSE_INTRINSICS_)
1622    // Get the difference
1623    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
1624    // Get the absolute value of the difference
1625    XMVECTOR vTemp = _mm_setzero_ps();
1626    vTemp = _mm_sub_ps(vTemp,vDelta);
1627    vTemp = _mm_max_ps(vTemp,vDelta);
1628    vTemp = _mm_cmple_ps(vTemp,Epsilon);
1629    return vTemp;
1630#else // _XM_VMX128_INTRINSICS_
1631#endif // _XM_VMX128_INTRINSICS_
1632}
1633
1634//------------------------------------------------------------------------------
1635
1636XMFINLINE XMVECTOR XMVectorNotEqual
1637(
1638    FXMVECTOR V1,
1639    FXMVECTOR V2
1640)
1641{
1642#if defined(_XM_NO_INTRINSICS_)
1643
1644    XMVECTOR Control;
1645    Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1646    Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1647    Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1648    Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1649    return Control;
1650
1651#elif defined(_XM_SSE_INTRINSICS_)
1652	return _mm_cmpneq_ps( V1, V2 );
1653#else // _XM_VMX128_INTRINSICS_
1654#endif // _XM_VMX128_INTRINSICS_
1655}
1656
1657//------------------------------------------------------------------------------
1658
1659XMFINLINE XMVECTOR XMVectorNotEqualInt
1660(
1661    FXMVECTOR V1,
1662    FXMVECTOR V2
1663)
1664{
1665#if defined(_XM_NO_INTRINSICS_)
1666
1667    XMVECTOR Control;
1668    Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0;
1669    Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0;
1670    Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0;
1671    Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0;
1672    return Control;
1673
1674#elif defined(_XM_SSE_INTRINSICS_)
1675    __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1676    return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask);
1677#else // _XM_VMX128_INTRINSICS_
1678#endif // _XM_VMX128_INTRINSICS_
1679}
1680
1681//------------------------------------------------------------------------------
1682
1683XMFINLINE XMVECTOR XMVectorGreater
1684(
1685    FXMVECTOR V1,
1686    FXMVECTOR V2
1687)
1688{
1689#if defined(_XM_NO_INTRINSICS_)
1690
1691    XMVECTOR Control;
1692    Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1693    Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1694    Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1695    Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1696    return Control;
1697
1698#elif defined(_XM_SSE_INTRINSICS_)
1699	return _mm_cmpgt_ps( V1, V2 );
1700#else // _XM_VMX128_INTRINSICS_
1701#endif // _XM_VMX128_INTRINSICS_
1702}
1703
1704//------------------------------------------------------------------------------
1705
1706XMFINLINE XMVECTOR XMVectorGreaterR
1707(
1708    UINT*    pCR,
1709    FXMVECTOR V1,
1710    FXMVECTOR V2
1711)
1712{
1713#if defined(_XM_NO_INTRINSICS_)
1714    UINT ux, uy, uz, uw, CR;
1715    XMVECTOR Control;
1716
1717    XMASSERT( pCR );
1718
1719    ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1720    uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1721    uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1722    uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1723    CR = 0;
1724    if (ux&uy&uz&uw)
1725    {
1726        // All elements are greater
1727        CR = XM_CRMASK_CR6TRUE;
1728    }
1729    else if (!(ux|uy|uz|uw))
1730    {
1731        // All elements are not greater
1732        CR = XM_CRMASK_CR6FALSE;
1733    }
1734    *pCR = CR;
1735    Control.vector4_u32[0] = ux;
1736    Control.vector4_u32[1] = uy;
1737    Control.vector4_u32[2] = uz;
1738    Control.vector4_u32[3] = uw;
1739    return Control;
1740
1741#elif defined(_XM_SSE_INTRINSICS_)
1742    XMASSERT( pCR );
1743    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
1744    UINT CR = 0;
1745    int iTest = _mm_movemask_ps(vTemp);
1746    if (iTest==0xf)
1747    {
1748        CR = XM_CRMASK_CR6TRUE;
1749    }
1750    else if (!iTest)
1751    {
1752        // All elements are not greater
1753        CR = XM_CRMASK_CR6FALSE;
1754    }
1755    *pCR = CR;
1756    return vTemp;
1757#else // _XM_VMX128_INTRINSICS_
1758#endif // _XM_VMX128_INTRINSICS_
1759}
1760
1761//------------------------------------------------------------------------------
1762
1763XMFINLINE XMVECTOR XMVectorGreaterOrEqual
1764(
1765    FXMVECTOR V1,
1766    FXMVECTOR V2
1767)
1768{
1769#if defined(_XM_NO_INTRINSICS_)
1770
1771    XMVECTOR Control;
1772    Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1773    Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1774    Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1775    Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1776    return Control;
1777
1778#elif defined(_XM_SSE_INTRINSICS_)
1779    return _mm_cmpge_ps( V1, V2 );
1780#else // _XM_VMX128_INTRINSICS_
1781#endif // _XM_VMX128_INTRINSICS_
1782}
1783
1784//------------------------------------------------------------------------------
1785
1786XMFINLINE XMVECTOR XMVectorGreaterOrEqualR
1787(
1788    UINT*    pCR,
1789    FXMVECTOR V1,
1790    FXMVECTOR V2
1791)
1792{
1793#if defined(_XM_NO_INTRINSICS_)
1794    UINT ux, uy, uz, uw, CR;
1795    XMVECTOR Control;
1796
1797    XMASSERT( pCR );
1798
1799    ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1800    uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1801    uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1802    uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1803    CR = 0;
1804    if (ux&uy&uz&uw)
1805    {
1806        // All elements are greater
1807        CR = XM_CRMASK_CR6TRUE;
1808    }
1809    else if (!(ux|uy|uz|uw))
1810    {
1811        // All elements are not greater
1812        CR = XM_CRMASK_CR6FALSE;
1813    }
1814    *pCR = CR;
1815    Control.vector4_u32[0] = ux;
1816    Control.vector4_u32[1] = uy;
1817    Control.vector4_u32[2] = uz;
1818    Control.vector4_u32[3] = uw;
1819    return Control;
1820
1821#elif defined(_XM_SSE_INTRINSICS_)
1822    XMASSERT( pCR );
1823    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
1824    UINT CR = 0;
1825    int iTest = _mm_movemask_ps(vTemp);
1826    if (iTest==0xf)
1827    {
1828        CR = XM_CRMASK_CR6TRUE;
1829    }
1830    else if (!iTest)
1831    {
1832        // All elements are not greater
1833        CR = XM_CRMASK_CR6FALSE;
1834    }
1835    *pCR = CR;
1836    return vTemp;
1837#else // _XM_VMX128_INTRINSICS_
1838#endif // _XM_VMX128_INTRINSICS_
1839}
1840
1841//------------------------------------------------------------------------------
1842
1843XMFINLINE XMVECTOR XMVectorLess
1844(
1845    FXMVECTOR V1,
1846    FXMVECTOR V2
1847)
1848{
1849#if defined(_XM_NO_INTRINSICS_)
1850
1851    XMVECTOR Control;
1852    Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1853    Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1854    Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1855    Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1856    return Control;
1857
1858#elif defined(_XM_SSE_INTRINSICS_)
1859    return _mm_cmplt_ps( V1, V2 );
1860#else // _XM_VMX128_INTRINSICS_
1861#endif // _XM_VMX128_INTRINSICS_
1862}
1863
1864//------------------------------------------------------------------------------
1865
1866XMFINLINE XMVECTOR XMVectorLessOrEqual
1867(
1868    FXMVECTOR V1,
1869    FXMVECTOR V2
1870)
1871{
1872#if defined(_XM_NO_INTRINSICS_)
1873
1874    XMVECTOR Control;
1875    Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1876    Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1877    Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1878    Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1879    return Control;
1880
1881#elif defined(_XM_SSE_INTRINSICS_)
1882    return _mm_cmple_ps( V1, V2 );
1883#else // _XM_VMX128_INTRINSICS_
1884#endif // _XM_VMX128_INTRINSICS_
1885}
1886
1887//------------------------------------------------------------------------------
1888
1889XMFINLINE XMVECTOR XMVectorInBounds
1890(
1891    FXMVECTOR V,
1892    FXMVECTOR Bounds
1893)
1894{
1895#if defined(_XM_NO_INTRINSICS_)
1896
1897    XMVECTOR Control;
1898    Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1899    Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1900    Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1901    Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1902    return Control;
1903
1904#elif defined(_XM_SSE_INTRINSICS_)
1905    // Test if less than or equal
1906    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
1907    // Negate the bounds
1908    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
1909    // Test if greater or equal (Reversed)
1910    vTemp2 = _mm_cmple_ps(vTemp2,V);
1911    // Blend answers
1912    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
1913    return vTemp1;
1914#else // _XM_VMX128_INTRINSICS_
1915#endif // _XM_VMX128_INTRINSICS_
1916}
1917
1918//------------------------------------------------------------------------------
1919
1920XMFINLINE XMVECTOR XMVectorInBoundsR
1921(
1922    UINT*    pCR,
1923    FXMVECTOR V,
1924    FXMVECTOR Bounds
1925)
1926{
1927#if defined(_XM_NO_INTRINSICS_)
1928    UINT ux, uy, uz, uw, CR;
1929    XMVECTOR Control;
1930
1931    XMASSERT( pCR != 0 );
1932
1933    ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1934    uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1935    uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1936    uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1937
1938    CR = 0;
1939
1940    if (ux&uy&uz&uw)
1941    {
1942        // All elements are in bounds
1943        CR = XM_CRMASK_CR6BOUNDS;
1944    }
1945    *pCR = CR;
1946    Control.vector4_u32[0] = ux;
1947    Control.vector4_u32[1] = uy;
1948    Control.vector4_u32[2] = uz;
1949    Control.vector4_u32[3] = uw;
1950    return Control;
1951
1952#elif defined(_XM_SSE_INTRINSICS_)
1953    XMASSERT( pCR != 0 );
1954    // Test if less than or equal
1955    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
1956    // Negate the bounds
1957    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
1958    // Test if greater or equal (Reversed)
1959    vTemp2 = _mm_cmple_ps(vTemp2,V);
1960    // Blend answers
1961    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
1962
1963    UINT CR = 0;
1964    if (_mm_movemask_ps(vTemp1)==0xf) {
1965        // All elements are in bounds
1966        CR = XM_CRMASK_CR6BOUNDS;
1967    }
1968    *pCR = CR;
1969    return vTemp1;
1970#else // _XM_VMX128_INTRINSICS_
1971#endif // _XM_VMX128_INTRINSICS_
1972}
1973
1974//------------------------------------------------------------------------------
1975
1976XMFINLINE XMVECTOR XMVectorIsNaN
1977(
1978    FXMVECTOR V
1979)
1980{
1981#if defined(_XM_NO_INTRINSICS_)
1982
1983    XMVECTOR Control;
1984    Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1985    Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1986    Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1987    Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1988    return Control;
1989
1990#elif defined(_XM_SSE_INTRINSICS_)
1991    // Mask off the exponent
1992    __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
1993    // Mask off the mantissa
1994    __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
1995    // Are any of the exponents == 0x7F800000?
1996    vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
1997    // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
1998    vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
1999    // Perform a not on the NaN test to be true on NON-zero mantissas
2000    vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
2001    // If any are NaN, the signs are true after the merge above
2002    return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0];
2003#else // _XM_VMX128_INTRINSICS_
2004#endif // _XM_VMX128_INTRINSICS_
2005}
2006
2007//------------------------------------------------------------------------------
2008
2009XMFINLINE XMVECTOR XMVectorIsInfinite
2010(
2011    FXMVECTOR V
2012)
2013{
2014#if defined(_XM_NO_INTRINSICS_)
2015
2016    XMVECTOR Control;
2017    Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
2018    Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
2019    Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
2020    Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
2021    return Control;
2022
2023#elif defined(_XM_SSE_INTRINSICS_)
2024    // Mask off the sign bit
2025    __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
2026    // Compare to infinity
2027    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
2028    // If any are infinity, the signs are true.
2029    return vTemp;
2030#else // _XM_VMX128_INTRINSICS_
2031#endif // _XM_VMX128_INTRINSICS_
2032}
2033
2034//------------------------------------------------------------------------------
2035// Rounding and clamping operations
2036//------------------------------------------------------------------------------
2037
2038//------------------------------------------------------------------------------
2039
2040XMFINLINE XMVECTOR XMVectorMin
2041(
2042    FXMVECTOR V1,
2043    FXMVECTOR V2
2044)
2045{
2046#if defined(_XM_NO_INTRINSICS_)
2047
2048    XMVECTOR Result;
2049    Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
2050    Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
2051    Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
2052    Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
2053    return Result;
2054
2055#elif defined(_XM_SSE_INTRINSICS_)
2056	return _mm_min_ps( V1, V2 );
2057#else // _XM_VMX128_INTRINSICS_
2058#endif // _XM_VMX128_INTRINSICS_
2059}
2060
2061//------------------------------------------------------------------------------
2062
2063XMFINLINE XMVECTOR XMVectorMax
2064(
2065    FXMVECTOR V1,
2066    FXMVECTOR V2
2067)
2068{
2069#if defined(_XM_NO_INTRINSICS_)
2070
2071    XMVECTOR Result;
2072    Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
2073    Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
2074    Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
2075    Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
2076    return Result;
2077
2078#elif defined(_XM_SSE_INTRINSICS_)
2079	return _mm_max_ps( V1, V2 );
2080#else // _XM_VMX128_INTRINSICS_
2081#endif // _XM_VMX128_INTRINSICS_
2082}
2083
2084//------------------------------------------------------------------------------
2085
2086XMFINLINE XMVECTOR XMVectorRound
2087(
2088    FXMVECTOR V
2089)
2090{
2091#if defined(_XM_NO_INTRINSICS_)
2092
2093    XMVECTOR       Result;
2094    XMVECTOR       Bias;
2095    CONST XMVECTOR Zero = XMVectorZero();
2096    CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f);
2097    CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f);
2098
2099    Bias = XMVectorLess(V, Zero);
2100    Bias = XMVectorSelect(BiasPos, BiasNeg, Bias);
2101    Result = XMVectorAdd(V, Bias);
2102    Result = XMVectorTruncate(Result);
2103
2104    return Result;
2105
2106#elif defined(_XM_SSE_INTRINSICS_)
2107    // To handle NAN, INF and numbers greater than 8388608, use masking
2108    // Get the abs value
2109    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2110    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2111    vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2112    // Convert to int and back to float for rounding
2113    __m128i vInt = _mm_cvtps_epi32(V);
2114    // Convert back to floats
2115    XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
2116    // All numbers less than 8388608 will use the round to int
2117    vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2118    // All others, use the ORIGINAL value
2119    vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2120    vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2121    return vResult;
2122#else // _XM_VMX128_INTRINSICS_
2123#endif // _XM_VMX128_INTRINSICS_
2124}
2125
2126//------------------------------------------------------------------------------
2127
2128XMFINLINE XMVECTOR XMVectorTruncate
2129(
2130    FXMVECTOR V
2131)
2132{
2133#if defined(_XM_NO_INTRINSICS_)
2134    XMVECTOR Result;
2135    UINT     i;
2136
2137    // Avoid C4701
2138    Result.vector4_f32[0] = 0.0f;
2139
2140    for (i = 0; i < 4; i++)
2141    {
2142        if (XMISNAN(V.vector4_f32[i]))
2143        {
2144            Result.vector4_u32[i] = 0x7FC00000;
2145        }
2146        else if (fabsf(V.vector4_f32[i]) < 8388608.0f)
2147        {
2148            Result.vector4_f32[i] = (FLOAT)((INT)V.vector4_f32[i]);
2149        }
2150        else
2151        {
2152            Result.vector4_f32[i] = V.vector4_f32[i];
2153        }
2154    }
2155    return Result;
2156
2157#elif defined(_XM_SSE_INTRINSICS_)
2158    // To handle NAN, INF and numbers greater than 8388608, use masking
2159    // Get the abs value
2160    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2161    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2162    vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2163    // Convert to int and back to float for rounding with truncation
2164    __m128i vInt = _mm_cvttps_epi32(V);
2165    // Convert back to floats
2166    XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
2167    // All numbers less than 8388608 will use the round to int
2168    vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2169    // All others, use the ORIGINAL value
2170    vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2171    vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2172    return vResult;
2173#else // _XM_VMX128_INTRINSICS_
2174#endif // _XM_VMX128_INTRINSICS_
2175}
2176
2177//------------------------------------------------------------------------------
2178
2179XMFINLINE XMVECTOR XMVectorFloor
2180(
2181    FXMVECTOR V
2182)
2183{
2184#if defined(_XM_NO_INTRINSICS_)
2185
2186    XMVECTOR vResult = {
2187        floorf(V.vector4_f32[0]),
2188        floorf(V.vector4_f32[1]),
2189        floorf(V.vector4_f32[2]),
2190        floorf(V.vector4_f32[3])
2191    };
2192    return vResult;
2193
2194#elif defined(_XM_SSE_INTRINSICS_)
2195    XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
2196    __m128i vInt = _mm_cvtps_epi32(vResult);
2197    vResult = _mm_cvtepi32_ps(vInt);
2198	return vResult;
2199#else // _XM_VMX128_INTRINSICS_
2200#endif // _XM_VMX128_INTRINSICS_
2201}
2202
2203//------------------------------------------------------------------------------
2204
2205XMFINLINE XMVECTOR XMVectorCeiling
2206(
2207    FXMVECTOR V
2208)
2209{
2210#if defined(_XM_NO_INTRINSICS_)
2211    XMVECTOR vResult = {
2212        ceilf(V.vector4_f32[0]),
2213        ceilf(V.vector4_f32[1]),
2214        ceilf(V.vector4_f32[2]),
2215        ceilf(V.vector4_f32[3])
2216    };
2217    return vResult;
2218
2219#elif defined(_XM_SSE_INTRINSICS_)
2220    XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
2221    __m128i vInt = _mm_cvtps_epi32(vResult);
2222    vResult = _mm_cvtepi32_ps(vInt);
2223	return vResult;
2224#else // _XM_VMX128_INTRINSICS_
2225#endif // _XM_VMX128_INTRINSICS_
2226}
2227
2228//------------------------------------------------------------------------------
2229
2230XMFINLINE XMVECTOR XMVectorClamp
2231(
2232    FXMVECTOR V,
2233    FXMVECTOR Min,
2234    FXMVECTOR Max
2235)
2236{
2237#if defined(_XM_NO_INTRINSICS_)
2238
2239    XMVECTOR Result;
2240
2241    XMASSERT(XMVector4LessOrEqual(Min, Max));
2242
2243    Result = XMVectorMax(Min, V);
2244    Result = XMVectorMin(Max, Result);
2245
2246    return Result;
2247
2248#elif defined(_XM_SSE_INTRINSICS_)
2249	XMVECTOR vResult;
2250	XMASSERT(XMVector4LessOrEqual(Min, Max));
2251	vResult = _mm_max_ps(Min,V);
2252	vResult = _mm_min_ps(vResult,Max);
2253	return vResult;
2254#else // _XM_VMX128_INTRINSICS_
2255#endif // _XM_VMX128_INTRINSICS_
2256}
2257
2258//------------------------------------------------------------------------------
2259
2260XMFINLINE XMVECTOR XMVectorSaturate
2261(
2262    FXMVECTOR V
2263)
2264{
2265#if defined(_XM_NO_INTRINSICS_)
2266
2267    CONST XMVECTOR Zero = XMVectorZero();
2268
2269    return XMVectorClamp(V, Zero, g_XMOne.v);
2270
2271#elif defined(_XM_SSE_INTRINSICS_)
2272    // Set <0 to 0
2273    XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2274    // Set>1 to 1
2275    return _mm_min_ps(vResult,g_XMOne);
2276#else // _XM_VMX128_INTRINSICS_
2277#endif // _XM_VMX128_INTRINSICS_
2278}
2279
2280//------------------------------------------------------------------------------
2281// Bitwise logical operations
2282//------------------------------------------------------------------------------
2283
2284XMFINLINE XMVECTOR XMVectorAndInt
2285(
2286    FXMVECTOR V1,
2287    FXMVECTOR V2
2288)
2289{
2290#if defined(_XM_NO_INTRINSICS_)
2291
2292    XMVECTOR Result;
2293
2294    Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0];
2295    Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1];
2296    Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2];
2297    Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3];
2298    return Result;
2299
2300#elif defined(_XM_SSE_INTRINSICS_)
2301    return _mm_and_ps(V1,V2);
2302#else // _XM_VMX128_INTRINSICS_
2303#endif // _XM_VMX128_INTRINSICS_
2304}
2305
2306//------------------------------------------------------------------------------
2307
2308XMFINLINE XMVECTOR XMVectorAndCInt
2309(
2310    FXMVECTOR V1,
2311    FXMVECTOR V2
2312)
2313{
2314#if defined(_XM_NO_INTRINSICS_)
2315
2316    XMVECTOR Result;
2317
2318    Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0];
2319    Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1];
2320    Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2];
2321    Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3];
2322
2323    return Result;
2324
2325#elif defined(_XM_SSE_INTRINSICS_)
2326    __m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i *>(&V2)[0], reinterpret_cast<const __m128i *>(&V1)[0] );
2327    return reinterpret_cast<__m128 *>(&V)[0];
2328#else // _XM_VMX128_INTRINSICS_
2329#endif // _XM_VMX128_INTRINSICS_
2330}
2331
2332//------------------------------------------------------------------------------
2333
2334XMFINLINE XMVECTOR XMVectorOrInt
2335(
2336    FXMVECTOR V1,
2337    FXMVECTOR V2
2338)
2339{
2340#if defined(_XM_NO_INTRINSICS_)
2341
2342    XMVECTOR Result;
2343
2344    Result.vector4_u32[0] = V1.vector4_u32[0] | V2.vector4_u32[0];
2345    Result.vector4_u32[1] = V1.vector4_u32[1] | V2.vector4_u32[1];
2346    Result.vector4_u32[2] = V1.vector4_u32[2] | V2.vector4_u32[2];
2347    Result.vector4_u32[3] = V1.vector4_u32[3] | V2.vector4_u32[3];
2348
2349    return Result;
2350
2351#elif defined(_XM_SSE_INTRINSICS_)
2352    __m128i V = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2353    return reinterpret_cast<__m128 *>(&V)[0];
2354#else // _XM_VMX128_INTRINSICS_
2355#endif // _XM_VMX128_INTRINSICS_
2356}
2357
2358//------------------------------------------------------------------------------
2359
2360XMFINLINE XMVECTOR XMVectorNorInt
2361(
2362    FXMVECTOR V1,
2363    FXMVECTOR V2
2364)
2365{
2366#if defined(_XM_NO_INTRINSICS_)
2367
2368    XMVECTOR Result;
2369
2370    Result.vector4_u32[0] = ~(V1.vector4_u32[0] | V2.vector4_u32[0]);
2371    Result.vector4_u32[1] = ~(V1.vector4_u32[1] | V2.vector4_u32[1]);
2372    Result.vector4_u32[2] = ~(V1.vector4_u32[2] | V2.vector4_u32[2]);
2373    Result.vector4_u32[3] = ~(V1.vector4_u32[3] | V2.vector4_u32[3]);
2374
2375    return Result;
2376
2377#elif defined(_XM_SSE_INTRINSICS_)
2378    __m128i Result;
2379    Result = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2380    Result = _mm_andnot_si128( Result,g_XMNegOneMask);
2381    return reinterpret_cast<__m128 *>(&Result)[0];
2382#else // _XM_VMX128_INTRINSICS_
2383#endif // _XM_VMX128_INTRINSICS_
2384}
2385
2386//------------------------------------------------------------------------------
2387
2388XMFINLINE XMVECTOR XMVectorXorInt
2389(
2390    FXMVECTOR V1,
2391    FXMVECTOR V2
2392)
2393{
2394#if defined(_XM_NO_INTRINSICS_)
2395
2396    XMVECTOR Result;
2397
2398    Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0];
2399    Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1];
2400    Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2];
2401    Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3];
2402
2403    return Result;
2404
2405#elif defined(_XM_SSE_INTRINSICS_)
2406	__m128i V = _mm_xor_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2407    return reinterpret_cast<__m128 *>(&V)[0];
2408#else // _XM_VMX128_INTRINSICS_
2409#endif // _XM_VMX128_INTRINSICS_
2410}
2411
2412//------------------------------------------------------------------------------
2413// Computation operations
2414//------------------------------------------------------------------------------
2415
2416//------------------------------------------------------------------------------
2417
2418XMFINLINE XMVECTOR XMVectorNegate
2419(
2420    FXMVECTOR V
2421)
2422{
2423#if defined(_XM_NO_INTRINSICS_)
2424
2425    XMVECTOR Result;
2426
2427    Result.vector4_f32[0] = -V.vector4_f32[0];
2428    Result.vector4_f32[1] = -V.vector4_f32[1];
2429    Result.vector4_f32[2] = -V.vector4_f32[2];
2430    Result.vector4_f32[3] = -V.vector4_f32[3];
2431
2432    return Result;
2433
2434#elif defined(_XM_SSE_INTRINSICS_)
2435	XMVECTOR Z;
2436
2437	Z = _mm_setzero_ps();
2438
2439	return _mm_sub_ps( Z, V );
2440#else // _XM_VMX128_INTRINSICS_
2441#endif // _XM_VMX128_INTRINSICS_
2442}
2443
2444//------------------------------------------------------------------------------
2445
2446XMFINLINE XMVECTOR XMVectorAdd
2447(
2448    FXMVECTOR V1,
2449    FXMVECTOR V2
2450)
2451{
2452#if defined(_XM_NO_INTRINSICS_)
2453
2454    XMVECTOR Result;
2455
2456    Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0];
2457    Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1];
2458    Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2];
2459    Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3];
2460
2461    return Result;
2462
2463#elif defined(_XM_SSE_INTRINSICS_)
2464	return _mm_add_ps( V1, V2 );
2465#else // _XM_VMX128_INTRINSICS_
2466#endif // _XM_VMX128_INTRINSICS_
2467}
2468
2469//------------------------------------------------------------------------------
2470
2471XMFINLINE XMVECTOR XMVectorAddAngles
2472(
2473    FXMVECTOR V1,
2474    FXMVECTOR V2
2475)
2476{
2477#if defined(_XM_NO_INTRINSICS_)
2478
2479    XMVECTOR       Mask;
2480    XMVECTOR       Offset;
2481    XMVECTOR       Result;
2482    CONST XMVECTOR Zero = XMVectorZero();
2483
2484    // Add the given angles together.  If the range of V1 is such
2485    // that -Pi <= V1 < Pi and the range of V2 is such that
2486    // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
2487    // will be -Pi <= Result < Pi.
2488    Result = XMVectorAdd(V1, V2);
2489
2490    Mask = XMVectorLess(Result, g_XMNegativePi.v);
2491    Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
2492
2493    Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
2494    Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
2495
2496    Result = XMVectorAdd(Result, Offset);
2497
2498    return Result;
2499
2500#elif defined(_XM_SSE_INTRINSICS_)
2501    // Adjust the angles
2502    XMVECTOR vResult = _mm_add_ps(V1,V2);
2503    // Less than Pi?
2504    XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
2505    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2506    // Add 2Pi to all entries less than -Pi
2507    vResult = _mm_add_ps(vResult,vOffset);
2508    // Greater than or equal to Pi?
2509    vOffset = _mm_cmpge_ps(vResult,g_XMPi);
2510    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2511    // Sub 2Pi to all entries greater than Pi
2512    vResult = _mm_sub_ps(vResult,vOffset);
2513    return vResult;
2514#else // _XM_VMX128_INTRINSICS_
2515#endif // _XM_VMX128_INTRINSICS_
2516}
2517
2518//------------------------------------------------------------------------------
2519
2520XMFINLINE XMVECTOR XMVectorSubtract
2521(
2522    FXMVECTOR V1,
2523    FXMVECTOR V2
2524)
2525{
2526#if defined(_XM_NO_INTRINSICS_)
2527
2528    XMVECTOR Result;
2529
2530    Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0];
2531    Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1];
2532    Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2];
2533    Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3];
2534
2535    return Result;
2536
2537#elif defined(_XM_SSE_INTRINSICS_)
2538	return _mm_sub_ps( V1, V2 );
2539#else // _XM_VMX128_INTRINSICS_
2540#endif // _XM_VMX128_INTRINSICS_
2541}
2542
2543//------------------------------------------------------------------------------
2544
2545XMFINLINE XMVECTOR XMVectorSubtractAngles
2546(
2547    FXMVECTOR V1,
2548    FXMVECTOR V2
2549)
2550{
2551#if defined(_XM_NO_INTRINSICS_)
2552
2553    XMVECTOR       Mask;
2554    XMVECTOR       Offset;
2555    XMVECTOR       Result;
2556    CONST XMVECTOR Zero = XMVectorZero();
2557
2558    // Subtract the given angles.  If the range of V1 is such
2559    // that -Pi <= V1 < Pi and the range of V2 is such that
2560    // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
2561    // will be -Pi <= Result < Pi.
2562    Result = XMVectorSubtract(V1, V2);
2563
2564    Mask = XMVectorLess(Result, g_XMNegativePi.v);
2565    Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
2566
2567    Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
2568    Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
2569
2570    Result = XMVectorAdd(Result, Offset);
2571
2572    return Result;
2573
2574#elif defined(_XM_SSE_INTRINSICS_)
2575    // Adjust the angles
2576    XMVECTOR vResult = _mm_sub_ps(V1,V2);
2577    // Less than Pi?
2578    XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
2579    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2580    // Add 2Pi to all entries less than -Pi
2581    vResult = _mm_add_ps(vResult,vOffset);
2582    // Greater than or equal to Pi?
2583    vOffset = _mm_cmpge_ps(vResult,g_XMPi);
2584    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2585    // Sub 2Pi to all entries greater than Pi
2586    vResult = _mm_sub_ps(vResult,vOffset);
2587    return vResult;
2588#else // _XM_VMX128_INTRINSICS_
2589#endif // _XM_VMX128_INTRINSICS_
2590}
2591
2592//------------------------------------------------------------------------------
2593
2594XMFINLINE XMVECTOR XMVectorMultiply
2595(
2596    FXMVECTOR V1,
2597    FXMVECTOR V2
2598)
2599{
2600#if defined(_XM_NO_INTRINSICS_)
2601    XMVECTOR Result = {
2602        V1.vector4_f32[0] * V2.vector4_f32[0],
2603        V1.vector4_f32[1] * V2.vector4_f32[1],
2604        V1.vector4_f32[2] * V2.vector4_f32[2],
2605        V1.vector4_f32[3] * V2.vector4_f32[3]
2606    };
2607    return Result;
2608#elif defined(_XM_SSE_INTRINSICS_)
2609	return _mm_mul_ps( V1, V2 );
2610#else // _XM_VMX128_INTRINSICS_
2611#endif // _XM_VMX128_INTRINSICS_
2612}
2613
2614//------------------------------------------------------------------------------
2615
2616XMFINLINE XMVECTOR XMVectorMultiplyAdd
2617(
2618    FXMVECTOR V1,
2619    FXMVECTOR V2,
2620    FXMVECTOR V3
2621)
2622{
2623#if defined(_XM_NO_INTRINSICS_)
2624    XMVECTOR vResult = {
2625        (V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0],
2626        (V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1],
2627        (V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2],
2628        (V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3]
2629    };
2630    return vResult;
2631
2632#elif defined(_XM_SSE_INTRINSICS_)
2633	XMVECTOR vResult = _mm_mul_ps( V1, V2 );
2634	return _mm_add_ps(vResult, V3 );
2635#else // _XM_VMX128_INTRINSICS_
2636#endif // _XM_VMX128_INTRINSICS_
2637}
2638
2639//------------------------------------------------------------------------------
2640
2641XMFINLINE XMVECTOR XMVectorDivide
2642(
2643    FXMVECTOR V1,
2644    FXMVECTOR V2
2645)
2646{
2647#if defined(_XM_NO_INTRINSICS_)
2648    XMVECTOR Result;
2649    Result.vector4_f32[0] = V1.vector4_f32[0] / V2.vector4_f32[0];
2650    Result.vector4_f32[1] = V1.vector4_f32[1] / V2.vector4_f32[1];
2651    Result.vector4_f32[2] = V1.vector4_f32[2] / V2.vector4_f32[2];
2652    Result.vector4_f32[3] = V1.vector4_f32[3] / V2.vector4_f32[3];
2653    return Result;
2654#elif defined(_XM_SSE_INTRINSICS_)
2655    return _mm_div_ps( V1, V2 );
2656#else // _XM_VMX128_INTRINSICS_
2657#endif // _XM_VMX128_INTRINSICS_
2658}
2659
2660//------------------------------------------------------------------------------
2661
2662XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract
2663(
2664    FXMVECTOR V1,
2665    FXMVECTOR V2,
2666    FXMVECTOR V3
2667)
2668{
2669#if defined(_XM_NO_INTRINSICS_)
2670
2671    XMVECTOR vResult = {
2672        V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]),
2673        V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]),
2674        V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]),
2675        V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3])
2676    };
2677    return vResult;
2678
2679#elif defined(_XM_SSE_INTRINSICS_)
2680	XMVECTOR R = _mm_mul_ps( V1, V2 );
2681	return _mm_sub_ps( V3, R );
2682#else // _XM_VMX128_INTRINSICS_
2683#endif // _XM_VMX128_INTRINSICS_
2684}
2685
2686//------------------------------------------------------------------------------
2687
2688XMFINLINE XMVECTOR XMVectorScale
2689(
2690    FXMVECTOR V,
2691    FLOAT    ScaleFactor
2692)
2693{
2694#if defined(_XM_NO_INTRINSICS_)
2695    XMVECTOR vResult = {
2696        V.vector4_f32[0] * ScaleFactor,
2697        V.vector4_f32[1] * ScaleFactor,
2698        V.vector4_f32[2] * ScaleFactor,
2699        V.vector4_f32[3] * ScaleFactor
2700    };
2701    return vResult;
2702
2703#elif defined(_XM_SSE_INTRINSICS_)
2704   XMVECTOR vResult = _mm_set_ps1(ScaleFactor);
2705   return _mm_mul_ps(vResult,V);
2706#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
2707#endif // _XM_VMX128_INTRINSICS_
2708}
2709
2710//------------------------------------------------------------------------------
2711
2712XMFINLINE XMVECTOR XMVectorReciprocalEst
2713(
2714    FXMVECTOR V
2715)
2716{
2717#if defined(_XM_NO_INTRINSICS_)
2718    XMVECTOR Result;
2719    UINT     i;
2720
2721    // Avoid C4701
2722    Result.vector4_f32[0] = 0.0f;
2723
2724    for (i = 0; i < 4; i++)
2725    {
2726        if (XMISNAN(V.vector4_f32[i]))
2727        {
2728            Result.vector4_u32[i] = 0x7FC00000;
2729        }
2730        else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f)
2731        {
2732            Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000);
2733        }
2734        else
2735        {
2736            Result.vector4_f32[i] = 1.f / V.vector4_f32[i];
2737        }
2738    }
2739    return Result;
2740
2741#elif defined(_XM_SSE_INTRINSICS_)
2742	return _mm_rcp_ps(V);
2743#else // _XM_VMX128_INTRINSICS_
2744#endif // _XM_VMX128_INTRINSICS_
2745}
2746
2747//------------------------------------------------------------------------------
2748
2749XMFINLINE XMVECTOR XMVectorReciprocal
2750(
2751    FXMVECTOR V
2752)
2753{
2754#if defined(_XM_NO_INTRINSICS_)
2755    return XMVectorReciprocalEst(V);
2756
2757#elif defined(_XM_SSE_INTRINSICS_)
2758    return _mm_div_ps(g_XMOne,V);
2759#else // _XM_VMX128_INTRINSICS_
2760#endif // _XM_VMX128_INTRINSICS_
2761}
2762
2763//------------------------------------------------------------------------------
2764// Return an estimated square root
2765XMFINLINE XMVECTOR XMVectorSqrtEst
2766(
2767    FXMVECTOR V
2768)
2769{
2770#if defined(_XM_NO_INTRINSICS_)
2771    XMVECTOR Select;
2772
2773    // if (x == +Infinity)  sqrt(x) = +Infinity
2774    // if (x == +0.0f)      sqrt(x) = +0.0f
2775    // if (x == -0.0f)      sqrt(x) = -0.0f
2776    // if (x < 0.0f)        sqrt(x) = QNaN
2777
2778    XMVECTOR Result = XMVectorReciprocalSqrtEst(V);
2779    XMVECTOR Zero = XMVectorZero();
2780    XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
2781    XMVECTOR VEqualsZero = XMVectorEqual(V, Zero);
2782    Result = XMVectorMultiply(V, Result);
2783    Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
2784    Result = XMVectorSelect(V, Result, Select);
2785    return Result;
2786
2787#elif defined(_XM_SSE_INTRINSICS_)
2788	return _mm_sqrt_ps(V);
2789#else // _XM_VMX128_INTRINSICS_
2790#endif // _XM_VMX128_INTRINSICS_
2791}
2792
2793//------------------------------------------------------------------------------
2794
2795XMFINLINE XMVECTOR XMVectorSqrt
2796(
2797    FXMVECTOR V
2798)
2799{
2800#if defined(_XM_NO_INTRINSICS_)
2801
2802    XMVECTOR Zero;
2803    XMVECTOR VEqualsInfinity, VEqualsZero;
2804    XMVECTOR Select;
2805    XMVECTOR Result;
2806
2807    // if (x == +Infinity)  sqrt(x) = +Infinity
2808    // if (x == +0.0f)      sqrt(x) = +0.0f
2809    // if (x == -0.0f)      sqrt(x) = -0.0f
2810    // if (x < 0.0f)        sqrt(x) = QNaN
2811
2812    Result = XMVectorReciprocalSqrt(V);
2813    Zero = XMVectorZero();
2814    VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
2815    VEqualsZero = XMVectorEqual(V, Zero);
2816    Result = XMVectorMultiply(V, Result);
2817    Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
2818    Result = XMVectorSelect(V, Result, Select);
2819
2820    return Result;
2821
2822#elif defined(_XM_SSE_INTRINSICS_)
2823	return _mm_sqrt_ps(V);
2824#else // _XM_VMX128_INTRINSICS_
2825#endif // _XM_VMX128_INTRINSICS_
2826}
2827
2828//------------------------------------------------------------------------------
2829
2830XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst
2831(
2832    FXMVECTOR V
2833)
2834{
2835#if defined(_XM_NO_INTRINSICS_)
2836
2837    // if (x == +Infinity)  rsqrt(x) = 0
2838    // if (x == +0.0f)      rsqrt(x) = +Infinity
2839    // if (x == -0.0f)      rsqrt(x) = -Infinity
2840    // if (x < 0.0f)        rsqrt(x) = QNaN
2841
2842    XMVECTOR Result;
2843    UINT     i;
2844
2845    // Avoid C4701
2846    Result.vector4_f32[0] = 0.0f;
2847
2848    for (i = 0; i < 4; i++)
2849    {
2850        if (XMISNAN(V.vector4_f32[i]))
2851        {
2852            Result.vector4_u32[i] = 0x7FC00000;
2853        }
2854        else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f)
2855        {
2856            Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000);
2857        }
2858        else if (V.vector4_f32[i] < 0.0f)
2859        {
2860            Result.vector4_u32[i] = 0x7FFFFFFF;
2861        }
2862        else if (XMISINF(V.vector4_f32[i]))
2863        {
2864            Result.vector4_f32[i] = 0.0f;
2865        }
2866        else
2867        {
2868            Result.vector4_f32[i] = 1.0f / sqrtf(V.vector4_f32[i]);
2869        }
2870    }
2871
2872    return Result;
2873
2874#elif defined(_XM_SSE_INTRINSICS_)
2875	return _mm_rsqrt_ps(V);
2876#else // _XM_VMX128_INTRINSICS_
2877#endif // _XM_VMX128_INTRINSICS_
2878}
2879
2880//------------------------------------------------------------------------------
2881
2882XMFINLINE XMVECTOR XMVectorReciprocalSqrt
2883(
2884    FXMVECTOR V
2885)
2886{
2887#if defined(_XM_NO_INTRINSICS_)
2888
2889    return XMVectorReciprocalSqrtEst(V);
2890
2891#elif defined(_XM_SSE_INTRINSICS_)
2892    XMVECTOR vResult = _mm_sqrt_ps(V);
2893    vResult = _mm_div_ps(g_XMOne,vResult);
2894    return vResult;
2895#else // _XM_VMX128_INTRINSICS_
2896#endif // _XM_VMX128_INTRINSICS_
2897}
2898
2899//------------------------------------------------------------------------------
2900
2901XMFINLINE XMVECTOR XMVectorExpEst
2902(
2903    FXMVECTOR V
2904)
2905{
2906#if defined(_XM_NO_INTRINSICS_)
2907
2908    XMVECTOR Result;
2909    Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]);
2910    Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]);
2911    Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]);
2912    Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]);
2913    return Result;
2914
2915#elif defined(_XM_SSE_INTRINSICS_)
2916    XMVECTOR vResult = _mm_setr_ps(
2917        powf(2.0f,XMVectorGetX(V)),
2918        powf(2.0f,XMVectorGetY(V)),
2919        powf(2.0f,XMVectorGetZ(V)),
2920        powf(2.0f,XMVectorGetW(V)));
2921    return vResult;
2922#else // _XM_VMX128_INTRINSICS_
2923#endif // _XM_VMX128_INTRINSICS_
2924}
2925
2926//------------------------------------------------------------------------------
2927
2928XMINLINE XMVECTOR XMVectorExp
2929(
2930    FXMVECTOR V
2931)
2932{
2933#if defined(_XM_NO_INTRINSICS_)
2934
2935    XMVECTOR               E, S;
2936    XMVECTOR               R, R2, R3, R4;
2937    XMVECTOR               V0, V1;
2938    XMVECTOR               C0X, C0Y, C0Z, C0W;
2939    XMVECTOR               C1X, C1Y, C1Z, C1W;
2940    XMVECTOR               Result;
2941    static CONST XMVECTOR  C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
2942    static CONST XMVECTOR  C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
2943
2944    R = XMVectorFloor(V);
2945    E = XMVectorExpEst(R);
2946    R = XMVectorSubtract(V, R);
2947    R2 = XMVectorMultiply(R, R);
2948    R3 = XMVectorMultiply(R, R2);
2949    R4 = XMVectorMultiply(R2, R2);
2950
2951    C0X = XMVectorSplatX(C0);
2952    C0Y = XMVectorSplatY(C0);
2953    C0Z = XMVectorSplatZ(C0);
2954    C0W = XMVectorSplatW(C0);
2955
2956    C1X = XMVectorSplatX(C1);
2957    C1Y = XMVectorSplatY(C1);
2958    C1Z = XMVectorSplatZ(C1);
2959    C1W = XMVectorSplatW(C1);
2960
2961    V0 = XMVectorMultiplyAdd(R, C0Y, C0X);
2962    V0 = XMVectorMultiplyAdd(R2, C0Z, V0);
2963    V0 = XMVectorMultiplyAdd(R3, C0W, V0);
2964
2965    V1 = XMVectorMultiplyAdd(R, C1Y, C1X);
2966    V1 = XMVectorMultiplyAdd(R2, C1Z, V1);
2967    V1 = XMVectorMultiplyAdd(R3, C1W, V1);
2968
2969    S = XMVectorMultiplyAdd(R4, V1, V0);
2970
2971    S = XMVectorReciprocal(S);
2972    Result = XMVectorMultiply(E, S);
2973
2974    return Result;
2975
2976#elif defined(_XM_SSE_INTRINSICS_)
2977    static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
2978    static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
2979
2980    // Get the integer of the input
2981    XMVECTOR R = XMVectorFloor(V);
2982    // Get the exponent estimate
2983    XMVECTOR E = XMVectorExpEst(R);
2984    // Get the fractional only
2985    R = _mm_sub_ps(V,R);
2986    // Get R^2
2987    XMVECTOR R2 = _mm_mul_ps(R,R);
2988    // And R^3
2989    XMVECTOR R3 = _mm_mul_ps(R,R2);
2990
2991    XMVECTOR V0 = _mm_load_ps1(&C0.f[1]);
2992    V0 = _mm_mul_ps(V0,R);
2993    XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]);
2994    V0 = _mm_add_ps(V0,vConstants);
2995    vConstants = _mm_load_ps1(&C0.f[2]);
2996    vConstants = _mm_mul_ps(vConstants,R2);
2997    V0 = _mm_add_ps(V0,vConstants);
2998    vConstants = _mm_load_ps1(&C0.f[3]);
2999    vConstants = _mm_mul_ps(vConstants,R3);
3000    V0 = _mm_add_ps(V0,vConstants);
3001
3002    XMVECTOR V1 = _mm_load_ps1(&C1.f[1]);
3003    V1 = _mm_mul_ps(V1,R);
3004    vConstants = _mm_load_ps1(&C1.f[0]);
3005    V1 = _mm_add_ps(V1,vConstants);
3006    vConstants = _mm_load_ps1(&C1.f[2]);
3007    vConstants = _mm_mul_ps(vConstants,R2);
3008    V1 = _mm_add_ps(V1,vConstants);
3009    vConstants = _mm_load_ps1(&C1.f[3]);
3010    vConstants = _mm_mul_ps(vConstants,R3);
3011    V1 = _mm_add_ps(V1,vConstants);
3012    // R2 = R^4
3013    R2 = _mm_mul_ps(R2,R2);
3014    R2 = _mm_mul_ps(R2,V1);
3015    R2 = _mm_add_ps(R2,V0);
3016    E = _mm_div_ps(E,R2);
3017    return E;
3018#else // _XM_VMX128_INTRINSICS_
3019#endif // _XM_VMX128_INTRINSICS_
3020}
3021
3022//------------------------------------------------------------------------------
3023
3024XMFINLINE XMVECTOR XMVectorLogEst
3025(
3026    FXMVECTOR V
3027)
3028{
3029#if defined(_XM_NO_INTRINSICS_)
3030
3031    FLOAT fScale = (1.0f / logf(2.0f));
3032    XMVECTOR Result;
3033
3034    Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
3035    Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
3036    Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
3037    Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
3038    return Result;
3039
3040#elif defined(_XM_SSE_INTRINSICS_)
3041    XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
3042    XMVECTOR vResult = _mm_setr_ps(
3043        logf(XMVectorGetX(V)),
3044        logf(XMVectorGetY(V)),
3045        logf(XMVectorGetZ(V)),
3046        logf(XMVectorGetW(V)));
3047    vResult = _mm_mul_ps(vResult,vScale);
3048    return vResult;
3049#else // _XM_VMX128_INTRINSICS_
3050#endif // _XM_VMX128_INTRINSICS_
3051}
3052
3053//------------------------------------------------------------------------------
3054
3055XMINLINE XMVECTOR XMVectorLog
3056(
3057    FXMVECTOR V
3058)
3059{
3060#if defined(_XM_NO_INTRINSICS_)
3061    FLOAT fScale = (1.0f / logf(2.0f));
3062    XMVECTOR Result;
3063
3064    Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
3065    Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
3066    Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
3067    Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
3068    return Result;
3069
3070#elif defined(_XM_SSE_INTRINSICS_)
3071    XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
3072    XMVECTOR vResult = _mm_setr_ps(
3073        logf(XMVectorGetX(V)),
3074        logf(XMVectorGetY(V)),
3075        logf(XMVectorGetZ(V)),
3076        logf(XMVectorGetW(V)));
3077    vResult = _mm_mul_ps(vResult,vScale);
3078    return vResult;
3079#else // _XM_VMX128_INTRINSICS_
3080#endif // _XM_VMX128_INTRINSICS_
3081}
3082
3083//------------------------------------------------------------------------------
3084
3085XMFINLINE XMVECTOR XMVectorPowEst
3086(
3087    FXMVECTOR V1,
3088    FXMVECTOR V2
3089)
3090{
3091#if defined(_XM_NO_INTRINSICS_)
3092
3093    XMVECTOR Result;
3094
3095    Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]);
3096    Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]);
3097    Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]);
3098    Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]);
3099
3100    return Result;
3101
3102#elif defined(_XM_SSE_INTRINSICS_)
3103    XMVECTOR vResult = _mm_setr_ps(
3104        powf(XMVectorGetX(V1),XMVectorGetX(V2)),
3105        powf(XMVectorGetY(V1),XMVectorGetY(V2)),
3106        powf(XMVectorGetZ(V1),XMVectorGetZ(V2)),
3107        powf(XMVectorGetW(V1),XMVectorGetW(V2)));
3108    return vResult;
3109#else // _XM_VMX128_INTRINSICS_
3110#endif // _XM_VMX128_INTRINSICS_
3111}
3112
3113//------------------------------------------------------------------------------
3114
3115XMFINLINE XMVECTOR XMVectorPow
3116(
3117    FXMVECTOR V1,
3118    FXMVECTOR V2
3119)
3120{
3121#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
3122
3123    return XMVectorPowEst(V1, V2);
3124
3125#else // _XM_VMX128_INTRINSICS_
3126#endif // _XM_VMX128_INTRINSICS_
3127}
3128
3129//------------------------------------------------------------------------------
3130
3131XMFINLINE XMVECTOR XMVectorAbs
3132(
3133    FXMVECTOR V
3134)
3135{
3136#if defined(_XM_NO_INTRINSICS_)
3137    XMVECTOR vResult = {
3138        fabsf(V.vector4_f32[0]),
3139        fabsf(V.vector4_f32[1]),
3140        fabsf(V.vector4_f32[2]),
3141        fabsf(V.vector4_f32[3])
3142    };
3143    return vResult;
3144
3145#elif defined(_XM_SSE_INTRINSICS_)
3146	XMVECTOR vResult = _mm_setzero_ps();
3147	vResult = _mm_sub_ps(vResult,V);
3148	vResult = _mm_max_ps(vResult,V);
3149    return vResult;
3150#else // _XM_VMX128_INTRINSICS_
3151#endif // _XM_VMX128_INTRINSICS_
3152}
3153
3154//------------------------------------------------------------------------------
3155
3156XMFINLINE XMVECTOR XMVectorMod
3157(
3158    FXMVECTOR V1,
3159    FXMVECTOR V2
3160)
3161{
3162#if defined(_XM_NO_INTRINSICS_)
3163
3164    XMVECTOR Reciprocal;
3165    XMVECTOR Quotient;
3166    XMVECTOR Result;
3167
3168    // V1 % V2 = V1 - V2 * truncate(V1 / V2)
3169    Reciprocal = XMVectorReciprocal(V2);
3170    Quotient = XMVectorMultiply(V1, Reciprocal);
3171    Quotient = XMVectorTruncate(Quotient);
3172    Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1);
3173
3174    return Result;
3175
3176#elif defined(_XM_SSE_INTRINSICS_)
3177    XMVECTOR vResult = _mm_div_ps(V1, V2);
3178    vResult = XMVectorTruncate(vResult);
3179    vResult = _mm_mul_ps(vResult,V2);
3180    vResult = _mm_sub_ps(V1,vResult);
3181    return vResult;
3182#else // _XM_VMX128_INTRINSICS_
3183#endif // _XM_VMX128_INTRINSICS_
3184}
3185
3186//------------------------------------------------------------------------------
3187
3188XMFINLINE XMVECTOR XMVectorModAngles
3189(
3190    FXMVECTOR Angles
3191)
3192{
3193#if defined(_XM_NO_INTRINSICS_)
3194
3195    XMVECTOR V;
3196    XMVECTOR Result;
3197
3198    // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
3199    V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v);
3200    V = XMVectorRound(V);
3201    Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles);
3202
3203    return Result;
3204
3205#elif defined(_XM_SSE_INTRINSICS_)
3206    // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
3207    XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi);
3208    // Use the inline function due to complexity for rounding
3209    vResult = XMVectorRound(vResult);
3210    vResult = _mm_mul_ps(vResult,g_XMTwoPi);
3211    vResult = _mm_sub_ps(Angles,vResult);
3212    return vResult;
3213#else // _XM_VMX128_INTRINSICS_
3214#endif // _XM_VMX128_INTRINSICS_
3215}
3216
3217//------------------------------------------------------------------------------
3218
3219XMINLINE XMVECTOR XMVectorSin
3220(
3221    FXMVECTOR V
3222)
3223{
3224
3225#if defined(_XM_NO_INTRINSICS_)
3226
3227    XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23;
3228    XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3229    XMVECTOR Result;
3230
3231    V1 = XMVectorModAngles(V);
3232
3233    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
3234    //           V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3235    V2  = XMVectorMultiply(V1, V1);
3236    V3  = XMVectorMultiply(V2, V1);
3237    V5  = XMVectorMultiply(V3, V2);
3238    V7  = XMVectorMultiply(V5, V2);
3239    V9  = XMVectorMultiply(V7, V2);
3240    V11 = XMVectorMultiply(V9, V2);
3241    V13 = XMVectorMultiply(V11, V2);
3242    V15 = XMVectorMultiply(V13, V2);
3243    V17 = XMVectorMultiply(V15, V2);
3244    V19 = XMVectorMultiply(V17, V2);
3245    V21 = XMVectorMultiply(V19, V2);
3246    V23 = XMVectorMultiply(V21, V2);
3247
3248    S1  = XMVectorSplatY(g_XMSinCoefficients0.v);
3249    S2  = XMVectorSplatZ(g_XMSinCoefficients0.v);
3250    S3  = XMVectorSplatW(g_XMSinCoefficients0.v);
3251    S4  = XMVectorSplatX(g_XMSinCoefficients1.v);
3252    S5  = XMVectorSplatY(g_XMSinCoefficients1.v);
3253    S6  = XMVectorSplatZ(g_XMSinCoefficients1.v);
3254    S7  = XMVectorSplatW(g_XMSinCoefficients1.v);
3255    S8  = XMVectorSplatX(g_XMSinCoefficients2.v);
3256    S9  = XMVectorSplatY(g_XMSinCoefficients2.v);
3257    S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
3258    S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
3259
3260    Result = XMVectorMultiplyAdd(S1, V3, V1);
3261    Result = XMVectorMultiplyAdd(S2, V5, Result);
3262    Result = XMVectorMultiplyAdd(S3, V7, Result);
3263    Result = XMVectorMultiplyAdd(S4, V9, Result);
3264    Result = XMVectorMultiplyAdd(S5, V11, Result);
3265    Result = XMVectorMultiplyAdd(S6, V13, Result);
3266    Result = XMVectorMultiplyAdd(S7, V15, Result);
3267    Result = XMVectorMultiplyAdd(S8, V17, Result);
3268    Result = XMVectorMultiplyAdd(S9, V19, Result);
3269    Result = XMVectorMultiplyAdd(S10, V21, Result);
3270    Result = XMVectorMultiplyAdd(S11, V23, Result);
3271
3272    return Result;
3273
3274#elif defined(_XM_SSE_INTRINSICS_)
3275    // Force the value within the bounds of pi
3276    XMVECTOR vResult = XMVectorModAngles(V);
3277    // Each on is V to the "num" power
3278    // V2 = V1^2
3279    XMVECTOR V2  = _mm_mul_ps(vResult,vResult);
3280    // V1^3
3281    XMVECTOR vPower = _mm_mul_ps(vResult,V2);
3282    XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
3283    vConstants = _mm_mul_ps(vConstants,vPower);
3284    vResult = _mm_add_ps(vResult,vConstants);
3285
3286    // V^5
3287    vPower = _mm_mul_ps(vPower,V2);
3288    vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
3289    vConstants = _mm_mul_ps(vConstants,vPower);
3290    vResult = _mm_add_ps(vResult,vConstants);
3291
3292    // V^7
3293    vPower = _mm_mul_ps(vPower,V2);
3294    vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
3295    vConstants = _mm_mul_ps(vConstants,vPower);
3296    vResult = _mm_add_ps(vResult,vConstants);
3297
3298    // V^9
3299    vPower = _mm_mul_ps(vPower,V2);
3300    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
3301    vConstants = _mm_mul_ps(vConstants,vPower);
3302    vResult = _mm_add_ps(vResult,vConstants);
3303
3304    // V^11
3305    vPower = _mm_mul_ps(vPower,V2);
3306    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
3307    vConstants = _mm_mul_ps(vConstants,vPower);
3308    vResult = _mm_add_ps(vResult,vConstants);
3309
3310    // V^13
3311    vPower = _mm_mul_ps(vPower,V2);
3312    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
3313    vConstants = _mm_mul_ps(vConstants,vPower);
3314    vResult = _mm_add_ps(vResult,vConstants);
3315
3316    // V^15
3317    vPower = _mm_mul_ps(vPower,V2);
3318    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
3319    vConstants = _mm_mul_ps(vConstants,vPower);
3320    vResult = _mm_add_ps(vResult,vConstants);
3321
3322    // V^17
3323    vPower = _mm_mul_ps(vPower,V2);
3324    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
3325    vConstants = _mm_mul_ps(vConstants,vPower);
3326    vResult = _mm_add_ps(vResult,vConstants);
3327
3328    // V^19
3329    vPower = _mm_mul_ps(vPower,V2);
3330    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
3331    vConstants = _mm_mul_ps(vConstants,vPower);
3332    vResult = _mm_add_ps(vResult,vConstants);
3333
3334    // V^21
3335    vPower = _mm_mul_ps(vPower,V2);
3336    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
3337    vConstants = _mm_mul_ps(vConstants,vPower);
3338    vResult = _mm_add_ps(vResult,vConstants);
3339
3340    // V^23
3341    vPower = _mm_mul_ps(vPower,V2);
3342    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
3343    vConstants = _mm_mul_ps(vConstants,vPower);
3344    vResult = _mm_add_ps(vResult,vConstants);
3345    return vResult;
3346#else // _XM_VMX128_INTRINSICS_
3347#endif // _XM_VMX128_INTRINSICS_
3348}
3349
3350//------------------------------------------------------------------------------
3351
3352XMINLINE XMVECTOR XMVectorCos
3353(
3354    FXMVECTOR V
3355)
3356{
3357#if defined(_XM_NO_INTRINSICS_)
3358
3359    XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22;
3360    XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3361    XMVECTOR Result;
3362
3363    V1 = XMVectorModAngles(V);
3364
3365    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
3366    //           V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3367    V2 = XMVectorMultiply(V1, V1);
3368    V4 = XMVectorMultiply(V2, V2);
3369    V6 = XMVectorMultiply(V4, V2);
3370    V8 = XMVectorMultiply(V4, V4);
3371    V10 = XMVectorMultiply(V6, V4);
3372    V12 = XMVectorMultiply(V6, V6);
3373    V14 = XMVectorMultiply(V8, V6);
3374    V16 = XMVectorMultiply(V8, V8);
3375    V18 = XMVectorMultiply(V10, V8);
3376    V20 = XMVectorMultiply(V10, V10);
3377    V22 = XMVectorMultiply(V12, V10);
3378
3379    C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
3380    C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
3381    C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
3382    C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
3383    C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
3384    C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
3385    C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
3386    C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
3387    C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
3388    C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
3389    C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
3390
3391    Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
3392    Result = XMVectorMultiplyAdd(C2, V4, Result);
3393    Result = XMVectorMultiplyAdd(C3, V6, Result);
3394    Result = XMVectorMultiplyAdd(C4, V8, Result);
3395    Result = XMVectorMultiplyAdd(C5, V10, Result);
3396    Result = XMVectorMultiplyAdd(C6, V12, Result);
3397    Result = XMVectorMultiplyAdd(C7, V14, Result);
3398    Result = XMVectorMultiplyAdd(C8, V16, Result);
3399    Result = XMVectorMultiplyAdd(C9, V18, Result);
3400    Result = XMVectorMultiplyAdd(C10, V20, Result);
3401    Result = XMVectorMultiplyAdd(C11, V22, Result);
3402
3403    return Result;
3404
3405#elif defined(_XM_SSE_INTRINSICS_)
3406    // Force the value within the bounds of pi
3407    XMVECTOR V2 = XMVectorModAngles(V);
3408    // Each on is V to the "num" power
3409    // V2 = V1^2
3410    V2  = _mm_mul_ps(V2,V2);
3411    // V^2
3412    XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
3413    vConstants = _mm_mul_ps(vConstants,V2);
3414    XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne);
3415
3416    // V^4
3417    XMVECTOR vPower = _mm_mul_ps(V2,V2);
3418    vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
3419    vConstants = _mm_mul_ps(vConstants,vPower);
3420    vResult = _mm_add_ps(vResult,vConstants);
3421
3422    // V^6
3423    vPower = _mm_mul_ps(vPower,V2);
3424    vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
3425    vConstants = _mm_mul_ps(vConstants,vPower);
3426    vResult = _mm_add_ps(vResult,vConstants);
3427
3428    // V^8
3429    vPower = _mm_mul_ps(vPower,V2);
3430    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
3431    vConstants = _mm_mul_ps(vConstants,vPower);
3432    vResult = _mm_add_ps(vResult,vConstants);
3433
3434    // V^10
3435    vPower = _mm_mul_ps(vPower,V2);
3436    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
3437    vConstants = _mm_mul_ps(vConstants,vPower);
3438    vResult = _mm_add_ps(vResult,vConstants);
3439
3440    // V^12
3441    vPower = _mm_mul_ps(vPower,V2);
3442    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
3443    vConstants = _mm_mul_ps(vConstants,vPower);
3444    vResult = _mm_add_ps(vResult,vConstants);
3445
3446    // V^14
3447    vPower = _mm_mul_ps(vPower,V2);
3448    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
3449    vConstants = _mm_mul_ps(vConstants,vPower);
3450    vResult = _mm_add_ps(vResult,vConstants);
3451
3452    // V^16
3453    vPower = _mm_mul_ps(vPower,V2);
3454    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
3455    vConstants = _mm_mul_ps(vConstants,vPower);
3456    vResult = _mm_add_ps(vResult,vConstants);
3457
3458    // V^18
3459    vPower = _mm_mul_ps(vPower,V2);
3460    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
3461    vConstants = _mm_mul_ps(vConstants,vPower);
3462    vResult = _mm_add_ps(vResult,vConstants);
3463
3464    // V^20
3465    vPower = _mm_mul_ps(vPower,V2);
3466    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
3467    vConstants = _mm_mul_ps(vConstants,vPower);
3468    vResult = _mm_add_ps(vResult,vConstants);
3469
3470    // V^22
3471    vPower = _mm_mul_ps(vPower,V2);
3472    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
3473    vConstants = _mm_mul_ps(vConstants,vPower);
3474    vResult = _mm_add_ps(vResult,vConstants);
3475    return vResult;
3476#else // _XM_VMX128_INTRINSICS_
3477#endif // _XM_VMX128_INTRINSICS_
3478}
3479
3480//------------------------------------------------------------------------------
3481
3482XMINLINE VOID XMVectorSinCos
3483(
3484    XMVECTOR* pSin,
3485    XMVECTOR* pCos,
3486    FXMVECTOR  V
3487)
3488{
3489#if defined(_XM_NO_INTRINSICS_)
3490
3491    XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
3492    XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
3493    XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3494    XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3495    XMVECTOR Sin, Cos;
3496
3497    XMASSERT(pSin);
3498    XMASSERT(pCos);
3499
3500    V1 = XMVectorModAngles(V);
3501
3502    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
3503    //           V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3504    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
3505    //           V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3506
3507    V2 = XMVectorMultiply(V1, V1);
3508    V3 = XMVectorMultiply(V2, V1);
3509    V4 = XMVectorMultiply(V2, V2);
3510    V5 = XMVectorMultiply(V3, V2);
3511    V6 = XMVectorMultiply(V3, V3);
3512    V7 = XMVectorMultiply(V4, V3);
3513    V8 = XMVectorMultiply(V4, V4);
3514    V9 = XMVectorMultiply(V5, V4);
3515    V10 = XMVectorMultiply(V5, V5);
3516    V11 = XMVectorMultiply(V6, V5);
3517    V12 = XMVectorMultiply(V6, V6);
3518    V13 = XMVectorMultiply(V7, V6);
3519    V14 = XMVectorMultiply(V7, V7);
3520    V15 = XMVectorMultiply(V8, V7);
3521    V16 = XMVectorMultiply(V8, V8);
3522    V17 = XMVectorMultiply(V9, V8);
3523    V18 = XMVectorMultiply(V9, V9);
3524    V19 = XMVectorMultiply(V10, V9);
3525    V20 = XMVectorMultiply(V10, V10);
3526    V21 = XMVectorMultiply(V11, V10);
3527    V22 = XMVectorMultiply(V11, V11);
3528    V23 = XMVectorMultiply(V12, V11);
3529
3530    S1  = XMVectorSplatY(g_XMSinCoefficients0.v);
3531    S2  = XMVectorSplatZ(g_XMSinCoefficients0.v);
3532    S3  = XMVectorSplatW(g_XMSinCoefficients0.v);
3533    S4  = XMVectorSplatX(g_XMSinCoefficients1.v);
3534    S5  = XMVectorSplatY(g_XMSinCoefficients1.v);
3535    S6  = XMVectorSplatZ(g_XMSinCoefficients1.v);
3536    S7  = XMVectorSplatW(g_XMSinCoefficients1.v);
3537    S8  = XMVectorSplatX(g_XMSinCoefficients2.v);
3538    S9  = XMVectorSplatY(g_XMSinCoefficients2.v);
3539    S10  = XMVectorSplatZ(g_XMSinCoefficients2.v);
3540    S11  = XMVectorSplatW(g_XMSinCoefficients2.v);
3541
3542    C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
3543    C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
3544    C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
3545    C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
3546    C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
3547    C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
3548    C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
3549    C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
3550    C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
3551    C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
3552    C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
3553
3554    Sin = XMVectorMultiplyAdd(S1, V3, V1);
3555    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
3556    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
3557    Sin = XMVectorMultiplyAdd(S4, V9, Sin);
3558    Sin = XMVectorMultiplyAdd(S5, V11, Sin);
3559    Sin = XMVectorMultiplyAdd(S6, V13, Sin);
3560    Sin = XMVectorMultiplyAdd(S7, V15, Sin);
3561    Sin = XMVectorMultiplyAdd(S8, V17, Sin);
3562    Sin = XMVectorMultiplyAdd(S9, V19, Sin);
3563    Sin = XMVectorMultiplyAdd(S10, V21, Sin);
3564    Sin = XMVectorMultiplyAdd(S11, V23, Sin);
3565
3566    Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
3567    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
3568    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
3569    Cos = XMVectorMultiplyAdd(C4, V8, Cos);
3570    Cos = XMVectorMultiplyAdd(C5, V10, Cos);
3571    Cos = XMVectorMultiplyAdd(C6, V12, Cos);
3572    Cos = XMVectorMultiplyAdd(C7, V14, Cos);
3573    Cos = XMVectorMultiplyAdd(C8, V16, Cos);
3574    Cos = XMVectorMultiplyAdd(C9, V18, Cos);
3575    Cos = XMVectorMultiplyAdd(C10, V20, Cos);
3576    Cos = XMVectorMultiplyAdd(C11, V22, Cos);
3577
3578    *pSin = Sin;
3579    *pCos = Cos;
3580
3581#elif defined(_XM_SSE_INTRINSICS_)
3582    XMASSERT(pSin);
3583    XMASSERT(pCos);
3584    XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
3585    XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
3586    XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3587    XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3588    XMVECTOR Sin, Cos;
3589
3590    V1 = XMVectorModAngles(V);
3591
3592    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
3593    //           V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3594    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
3595    //           V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3596
3597    V2 = XMVectorMultiply(V1, V1);
3598    V3 = XMVectorMultiply(V2, V1);
3599    V4 = XMVectorMultiply(V2, V2);
3600    V5 = XMVectorMultiply(V3, V2);
3601    V6 = XMVectorMultiply(V3, V3);
3602    V7 = XMVectorMultiply(V4, V3);
3603    V8 = XMVectorMultiply(V4, V4);
3604    V9 = XMVectorMultiply(V5, V4);
3605    V10 = XMVectorMultiply(V5, V5);
3606    V11 = XMVectorMultiply(V6, V5);
3607    V12 = XMVectorMultiply(V6, V6);
3608    V13 = XMVectorMultiply(V7, V6);
3609    V14 = XMVectorMultiply(V7, V7);
3610    V15 = XMVectorMultiply(V8, V7);
3611    V16 = XMVectorMultiply(V8, V8);
3612    V17 = XMVectorMultiply(V9, V8);
3613    V18 = XMVectorMultiply(V9, V9);
3614    V19 = XMVectorMultiply(V10, V9);
3615    V20 = XMVectorMultiply(V10, V10);
3616    V21 = XMVectorMultiply(V11, V10);
3617    V22 = XMVectorMultiply(V11, V11);
3618    V23 = XMVectorMultiply(V12, V11);
3619
3620    S1  = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
3621    S2  = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
3622    S3  = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
3623    S4  = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
3624    S5  = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
3625    S6  = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
3626    S7  = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
3627    S8  = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
3628    S9  = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
3629    S10  = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
3630    S11  = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
3631
3632    C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
3633    C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
3634    C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
3635    C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
3636    C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
3637    C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
3638    C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
3639    C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
3640    C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
3641    C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
3642    C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
3643
3644    S1 = _mm_mul_ps(S1,V3);
3645    Sin = _mm_add_ps(S1,V1);
3646    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
3647    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
3648    Sin = XMVectorMultiplyAdd(S4, V9, Sin);
3649    Sin = XMVectorMultiplyAdd(S5, V11, Sin);
3650    Sin = XMVectorMultiplyAdd(S6, V13, Sin);
3651    Sin = XMVectorMultiplyAdd(S7, V15, Sin);
3652    Sin = XMVectorMultiplyAdd(S8, V17, Sin);
3653    Sin = XMVectorMultiplyAdd(S9, V19, Sin);
3654    Sin = XMVectorMultiplyAdd(S10, V21, Sin);
3655    Sin = XMVectorMultiplyAdd(S11, V23, Sin);
3656
3657    Cos = _mm_mul_ps(C1,V2);
3658    Cos = _mm_add_ps(Cos,g_XMOne);
3659    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
3660    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
3661    Cos = XMVectorMultiplyAdd(C4, V8, Cos);
3662    Cos = XMVectorMultiplyAdd(C5, V10, Cos);
3663    Cos = XMVectorMultiplyAdd(C6, V12, Cos);
3664    Cos = XMVectorMultiplyAdd(C7, V14, Cos);
3665    Cos = XMVectorMultiplyAdd(C8, V16, Cos);
3666    Cos = XMVectorMultiplyAdd(C9, V18, Cos);
3667    Cos = XMVectorMultiplyAdd(C10, V20, Cos);
3668    Cos = XMVectorMultiplyAdd(C11, V22, Cos);
3669
3670    *pSin = Sin;
3671    *pCos = Cos;
3672#else // _XM_VMX128_INTRINSICS_
3673#endif // _XM_VMX128_INTRINSICS_
3674}
3675
3676//------------------------------------------------------------------------------
3677
3678XMINLINE XMVECTOR XMVectorTan
3679(
3680    FXMVECTOR V
3681)
3682{
3683#if defined(_XM_NO_INTRINSICS_)
3684
3685    // Cody and Waite algorithm to compute tangent.
3686
3687    XMVECTOR VA, VB, VC, VC2;
3688    XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
3689    XMVECTOR C0, C1, TwoDivPi, Epsilon;
3690    XMVECTOR N, D;
3691    XMVECTOR R0, R1;
3692    XMVECTOR VIsZero, VCNearZero, VBIsEven;
3693    XMVECTOR Zero;
3694    XMVECTOR Result;
3695    UINT     i;
3696    static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
3697    static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
3698    static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
3699    static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1};
3700
3701    TwoDivPi = XMVectorSplatW(TanConstants);
3702
3703    Zero = XMVectorZero();
3704
3705    C0 = XMVectorSplatX(TanConstants);
3706    C1 = XMVectorSplatY(TanConstants);
3707    Epsilon = XMVectorSplatZ(TanConstants);
3708
3709    VA = XMVectorMultiply(V, TwoDivPi);
3710
3711    VA = XMVectorRound(VA);
3712
3713    VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
3714
3715    VB = XMVectorAbs(VA);
3716
3717    VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
3718
3719    for (i = 0; i < 4; i++)
3720    {
3721        VB.vector4_u32[i] = (UINT)VB.vector4_f32[i];
3722    }
3723
3724    VC2 = XMVectorMultiply(VC, VC);
3725
3726    T7 = XMVectorSplatW(TanCoefficients1);
3727    T6 = XMVectorSplatZ(TanCoefficients1);
3728    T4 = XMVectorSplatX(TanCoefficients1);
3729    T3 = XMVectorSplatW(TanCoefficients0);
3730    T5 = XMVectorSplatY(TanCoefficients1);
3731    T2 = XMVectorSplatZ(TanCoefficients0);
3732    T1 = XMVectorSplatY(TanCoefficients0);
3733    T0 = XMVectorSplatX(TanCoefficients0);
3734
3735    VBIsEven = XMVectorAndInt(VB, Mask.v);
3736    VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
3737
3738    N = XMVectorMultiplyAdd(VC2, T7, T6);
3739    D = XMVectorMultiplyAdd(VC2, T4, T3);
3740    N = XMVectorMultiplyAdd(VC2, N, T5);
3741    D = XMVectorMultiplyAdd(VC2, D, T2);
3742    N = XMVectorMultiply(VC2, N);
3743    D = XMVectorMultiplyAdd(VC2, D, T1);
3744    N = XMVectorMultiplyAdd(VC, N, VC);
3745    VCNearZero = XMVectorInBounds(VC, Epsilon);
3746    D = XMVectorMultiplyAdd(VC2, D, T0);
3747
3748    N = XMVectorSelect(N, VC, VCNearZero);
3749    D = XMVectorSelect(D, g_XMOne.v, VCNearZero);
3750
3751    R0 = XMVectorNegate(N);
3752    R1 = XMVectorReciprocal(D);
3753    R0 = XMVectorReciprocal(R0);
3754    R1 = XMVectorMultiply(N, R1);
3755    R0 = XMVectorMultiply(D, R0);
3756
3757    VIsZero = XMVectorEqual(V, Zero);
3758
3759    Result = XMVectorSelect(R0, R1, VBIsEven);
3760
3761    Result = XMVectorSelect(Result, Zero, VIsZero);
3762
3763    return Result;
3764
3765#elif defined(_XM_SSE_INTRINSICS_)
3766    // Cody and Waite algorithm to compute tangent.
3767
3768    XMVECTOR VA, VB, VC, VC2;
3769    XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
3770    XMVECTOR C0, C1, TwoDivPi, Epsilon;
3771    XMVECTOR N, D;
3772    XMVECTOR R0, R1;
3773    XMVECTOR VIsZero, VCNearZero, VBIsEven;
3774    XMVECTOR Zero;
3775    XMVECTOR Result;
3776    static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
3777    static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
3778    static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
3779    static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1};
3780
3781    TwoDivPi = XMVectorSplatW(TanConstants);
3782
3783    Zero = XMVectorZero();
3784
3785    C0 = XMVectorSplatX(TanConstants);
3786    C1 = XMVectorSplatY(TanConstants);
3787    Epsilon = XMVectorSplatZ(TanConstants);
3788
3789    VA = XMVectorMultiply(V, TwoDivPi);
3790
3791    VA = XMVectorRound(VA);
3792
3793    VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
3794
3795    VB = XMVectorAbs(VA);
3796
3797    VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
3798
3799    reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB);
3800
3801    VC2 = XMVectorMultiply(VC, VC);
3802
3803    T7 = XMVectorSplatW(TanCoefficients1);
3804    T6 = XMVectorSplatZ(TanCoefficients1);
3805    T4 = XMVectorSplatX(TanCoefficients1);
3806    T3 = XMVectorSplatW(TanCoefficients0);
3807    T5 = XMVectorSplatY(TanCoefficients1);
3808    T2 = XMVectorSplatZ(TanCoefficients0);
3809    T1 = XMVectorSplatY(TanCoefficients0);
3810    T0 = XMVectorSplatX(TanCoefficients0);
3811
3812    VBIsEven = XMVectorAndInt(VB,Mask);
3813    VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
3814
3815    N = XMVectorMultiplyAdd(VC2, T7, T6);
3816    D = XMVectorMultiplyAdd(VC2, T4, T3);
3817    N = XMVectorMultiplyAdd(VC2, N, T5);
3818    D = XMVectorMultiplyAdd(VC2, D, T2);
3819    N = XMVectorMultiply(VC2, N);
3820    D = XMVectorMultiplyAdd(VC2, D, T1);
3821    N = XMVectorMultiplyAdd(VC, N, VC);
3822    VCNearZero = XMVectorInBounds(VC, Epsilon);
3823    D = XMVectorMultiplyAdd(VC2, D, T0);
3824
3825    N = XMVectorSelect(N, VC, VCNearZero);
3826    D = XMVectorSelect(D, g_XMOne, VCNearZero);
3827    R0 = XMVectorNegate(N);
3828    R1 = _mm_div_ps(N,D);
3829    R0 = _mm_div_ps(D,R0);
3830    VIsZero = XMVectorEqual(V, Zero);
3831    Result = XMVectorSelect(R0, R1, VBIsEven);
3832    Result = XMVectorSelect(Result, Zero, VIsZero);
3833
3834    return Result;
3835
3836#else // _XM_VMX128_INTRINSICS_
3837#endif // _XM_VMX128_INTRINSICS_
3838}
3839
3840//------------------------------------------------------------------------------
3841
3842XMINLINE XMVECTOR XMVectorSinH
3843(
3844    FXMVECTOR V
3845)
3846{
3847#if defined(_XM_NO_INTRINSICS_)
3848
3849    XMVECTOR V1, V2;
3850    XMVECTOR E1, E2;
3851    XMVECTOR Result;
3852    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3853
3854    V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
3855    V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
3856
3857    E1 = XMVectorExp(V1);
3858    E2 = XMVectorExp(V2);
3859
3860    Result = XMVectorSubtract(E1, E2);
3861
3862    return Result;
3863
3864#elif defined(_XM_SSE_INTRINSICS_)
3865    XMVECTOR V1, V2;
3866    XMVECTOR E1, E2;
3867    XMVECTOR Result;
3868    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3869
3870    V1 = _mm_mul_ps(V, Scale);
3871    V1 = _mm_add_ps(V1,g_XMNegativeOne);
3872    V2 = _mm_mul_ps(V, Scale);
3873    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
3874    E1 = XMVectorExp(V1);
3875    E2 = XMVectorExp(V2);
3876
3877    Result = _mm_sub_ps(E1, E2);
3878
3879    return Result;
3880#else // _XM_VMX128_INTRINSICS_
3881#endif // _XM_VMX128_INTRINSICS_
3882}
3883
3884//------------------------------------------------------------------------------
3885
3886XMINLINE XMVECTOR XMVectorCosH
3887(
3888    FXMVECTOR V
3889)
3890{
3891#if defined(_XM_NO_INTRINSICS_)
3892
3893    XMVECTOR V1, V2;
3894    XMVECTOR E1, E2;
3895    XMVECTOR Result;
3896    static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3897
3898    V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
3899    V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
3900
3901    E1 = XMVectorExp(V1);
3902    E2 = XMVectorExp(V2);
3903
3904    Result = XMVectorAdd(E1, E2);
3905
3906    return Result;
3907
3908#elif defined(_XM_SSE_INTRINSICS_)
3909    XMVECTOR V1, V2;
3910    XMVECTOR E1, E2;
3911    XMVECTOR Result;
3912    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3913
3914    V1 = _mm_mul_ps(V,Scale);
3915    V1 = _mm_add_ps(V1,g_XMNegativeOne);
3916    V2 = _mm_mul_ps(V, Scale);
3917    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
3918    E1 = XMVectorExp(V1);
3919    E2 = XMVectorExp(V2);
3920    Result = _mm_add_ps(E1, E2);
3921    return Result;
3922#else // _XM_VMX128_INTRINSICS_
3923#endif // _XM_VMX128_INTRINSICS_
3924}
3925
3926//------------------------------------------------------------------------------
3927
3928XMINLINE XMVECTOR XMVectorTanH
3929(
3930    FXMVECTOR V
3931)
3932{
3933#if defined(_XM_NO_INTRINSICS_)
3934
3935    XMVECTOR E;
3936    XMVECTOR Result;
3937    static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
3938
3939    E = XMVectorMultiply(V, Scale.v);
3940    E = XMVectorExp(E);
3941    E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
3942    E = XMVectorReciprocal(E);
3943
3944    Result = XMVectorSubtract(g_XMOne.v, E);
3945
3946    return Result;
3947
3948#elif defined(_XM_SSE_INTRINSICS_)
3949    static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
3950
3951    XMVECTOR E = _mm_mul_ps(V, Scale);
3952    E = XMVectorExp(E);
3953    E = _mm_mul_ps(E,g_XMOneHalf);
3954    E = _mm_add_ps(E,g_XMOneHalf);
3955    E = XMVectorReciprocal(E);
3956    E = _mm_sub_ps(g_XMOne, E);
3957    return E;
3958#else // _XM_VMX128_INTRINSICS_
3959#endif // _XM_VMX128_INTRINSICS_
3960}
3961
3962//------------------------------------------------------------------------------
3963
3964XMINLINE XMVECTOR XMVectorASin
3965(
3966    FXMVECTOR V
3967)
3968{
3969#if defined(_XM_NO_INTRINSICS_)
3970
3971    XMVECTOR V2, V3, AbsV;
3972    XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3973    XMVECTOR R0, R1, R2, R3, R4;
3974    XMVECTOR OneMinusAbsV;
3975    XMVECTOR Rsq;
3976    XMVECTOR Result;
3977    static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
3978
3979    // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
3980    //           V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
3981
3982    AbsV = XMVectorAbs(V);
3983
3984    V2 = XMVectorMultiply(V, V);
3985    V3 = XMVectorMultiply(V2, AbsV);
3986
3987    R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
3988
3989    OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
3990    Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
3991
3992    C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
3993    C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
3994    C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
3995    C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
3996
3997    C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
3998    C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
3999    C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
4000    C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
4001
4002    C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
4003    C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
4004    C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
4005    C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
4006
4007    R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
4008    R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
4009    R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
4010    R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
4011
4012    R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
4013    R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
4014    R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
4015    R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
4016
4017    R0 = XMVectorMultiplyAdd(R2, V3, R0);
4018    R1 = XMVectorMultiplyAdd(R3, V3, R1);
4019
4020    R0 = XMVectorMultiply(V, R0);
4021    R1 = XMVectorMultiply(R4, R1);
4022
4023    Result = XMVectorMultiplyAdd(R1, Rsq, R0);
4024
4025    return Result;
4026
4027#elif defined(_XM_SSE_INTRINSICS_)
4028    static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4029
4030    // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
4031    //           V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
4032    // Get abs(V)
4033	XMVECTOR vAbsV = _mm_setzero_ps();
4034	vAbsV = _mm_sub_ps(vAbsV,V);
4035	vAbsV = _mm_max_ps(vAbsV,V);
4036
4037    XMVECTOR R0 = vAbsV;
4038    XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
4039    R0 = _mm_mul_ps(R0,vConstants);
4040    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
4041    R0 = _mm_add_ps(R0,vConstants);
4042
4043    XMVECTOR R1 = vAbsV;
4044    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
4045    R1 = _mm_mul_ps(R1,vConstants);
4046    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
4047    R1 = _mm_add_ps(R1, vConstants);
4048
4049    XMVECTOR R2 = vAbsV;
4050    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
4051    R2 = _mm_mul_ps(R2,vConstants);
4052    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
4053    R2 = _mm_add_ps(R2, vConstants);
4054
4055    XMVECTOR R3 = vAbsV;
4056    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
4057    R3 = _mm_mul_ps(R3,vConstants);
4058    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
4059    R3 = _mm_add_ps(R3, vConstants);
4060
4061    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
4062    R0 = _mm_mul_ps(R0,vAbsV);
4063    R0 = _mm_add_ps(R0,vConstants);
4064
4065    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
4066    R1 = _mm_mul_ps(R1,vAbsV);
4067    R1 = _mm_add_ps(R1,vConstants);
4068
4069    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
4070    R2 = _mm_mul_ps(R2,vAbsV);
4071    R2 = _mm_add_ps(R2,vConstants);
4072
4073    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
4074    R3 = _mm_mul_ps(R3,vAbsV);
4075    R3 = _mm_add_ps(R3,vConstants);
4076
4077    // V3 = V^3
4078    vConstants = _mm_mul_ps(V,V);
4079    vConstants = _mm_mul_ps(vConstants, vAbsV);
4080    // Mul by V^3
4081    R2 = _mm_mul_ps(R2,vConstants);
4082    R3 = _mm_mul_ps(R3,vConstants);
4083    // Merge the results
4084    R0 = _mm_add_ps(R0,R2);
4085    R1 = _mm_add_ps(R1,R3);
4086
4087    R0 = _mm_mul_ps(R0,V);
4088    // vConstants = V-(V^2 retaining sign)
4089    vConstants = _mm_mul_ps(vAbsV, V);
4090    vConstants = _mm_sub_ps(V,vConstants);
4091    R1 = _mm_mul_ps(R1,vConstants);
4092    vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV);
4093    // Do NOT use rsqrt/mul. This needs the precision
4094    vConstants = _mm_sqrt_ps(vConstants);
4095    R1 = _mm_div_ps(R1,vConstants);
4096    R0 = _mm_add_ps(R0,R1);
4097    return R0;
4098#else // _XM_VMX128_INTRINSICS_
4099#endif // _XM_VMX128_INTRINSICS_
4100}
4101
4102//------------------------------------------------------------------------------
4103
4104XMINLINE XMVECTOR XMVectorACos
4105(
4106    FXMVECTOR V
4107)
4108{
4109#if defined(_XM_NO_INTRINSICS_)
4110
4111    XMVECTOR V2, V3, AbsV;
4112    XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
4113    XMVECTOR R0, R1, R2, R3, R4;
4114    XMVECTOR OneMinusAbsV;
4115    XMVECTOR Rsq;
4116    XMVECTOR Result;
4117    static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4118
4119    // acos(V) = PI / 2 - asin(V)
4120
4121    AbsV = XMVectorAbs(V);
4122
4123    V2 = XMVectorMultiply(V, V);
4124    V3 = XMVectorMultiply(V2, AbsV);
4125
4126    R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
4127
4128    OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
4129    Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
4130
4131    C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
4132    C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
4133    C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
4134    C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
4135
4136    C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
4137    C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
4138    C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
4139    C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
4140
4141    C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
4142    C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
4143    C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
4144    C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
4145
4146    R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
4147    R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
4148    R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
4149    R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
4150
4151    R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
4152    R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
4153    R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
4154    R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
4155
4156    R0 = XMVectorMultiplyAdd(R2, V3, R0);
4157    R1 = XMVectorMultiplyAdd(R3, V3, R1);
4158
4159    R0 = XMVectorMultiply(V, R0);
4160    R1 = XMVectorMultiply(R4, R1);
4161
4162    Result = XMVectorMultiplyAdd(R1, Rsq, R0);
4163
4164    Result = XMVectorSubtract(g_XMHalfPi.v, Result);
4165
4166    return Result;
4167
4168#elif defined(_XM_SSE_INTRINSICS_)
4169    static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4170    // Uses only 6 registers for good code on x86 targets
4171    // acos(V) = PI / 2 - asin(V)
4172    // Get abs(V)
4173	XMVECTOR vAbsV = _mm_setzero_ps();
4174	vAbsV = _mm_sub_ps(vAbsV,V);
4175	vAbsV = _mm_max_ps(vAbsV,V);
4176    // Perform the series in precision groups to
4177    // retain precision across 20 bits. (3 bits of imprecision due to operations)
4178    XMVECTOR R0 = vAbsV;
4179    XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
4180    R0 = _mm_mul_ps(R0,vConstants);
4181    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
4182    R0 = _mm_add_ps(R0,vConstants);
4183    R0 = _mm_mul_ps(R0,vAbsV);
4184    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
4185    R0 = _mm_add_ps(R0,vConstants);
4186
4187    XMVECTOR R1 = vAbsV;
4188    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
4189    R1 = _mm_mul_ps(R1,vConstants);
4190    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
4191    R1 = _mm_add_ps(R1,vConstants);
4192    R1 = _mm_mul_ps(R1, vAbsV);
4193    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
4194    R1 = _mm_add_ps(R1,vConstants);
4195
4196    XMVECTOR R2 = vAbsV;
4197    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
4198    R2 = _mm_mul_ps(R2,vConstants);
4199    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
4200    R2 = _mm_add_ps(R2,vConstants);
4201    R2 = _mm_mul_ps(R2, vAbsV);
4202    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
4203    R2 = _mm_add_ps(R2,vConstants);
4204
4205    XMVECTOR R3 = vAbsV;
4206    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
4207    R3 = _mm_mul_ps(R3,vConstants);
4208    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
4209    R3 = _mm_add_ps(R3,vConstants);
4210    R3 = _mm_mul_ps(R3, vAbsV);
4211    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
4212    R3 = _mm_add_ps(R3,vConstants);
4213
4214    // vConstants = V^3
4215    vConstants = _mm_mul_ps(V,V);
4216    vConstants = _mm_mul_ps(vConstants,vAbsV);
4217    R2 = _mm_mul_ps(R2,vConstants);
4218    R3 = _mm_mul_ps(R3,vConstants);
4219    // Add the pair of values together here to retain
4220    // as much precision as possible
4221    R0 = _mm_add_ps(R0,R2);
4222    R1 = _mm_add_ps(R1,R3);
4223
4224    R0 = _mm_mul_ps(R0,V);
4225    // vConstants = V-(V*abs(V))
4226    vConstants = _mm_mul_ps(V,vAbsV);
4227    vConstants = _mm_sub_ps(V,vConstants);
4228    R1 = _mm_mul_ps(R1,vConstants);
4229    // Episilon exists to allow 1.0 as an answer
4230    vConstants = _mm_sub_ps(OnePlusEpsilon, vAbsV);
4231    // Use sqrt instead of rsqrt for precision
4232    vConstants = _mm_sqrt_ps(vConstants);
4233    R1 = _mm_div_ps(R1,vConstants);
4234    R1 = _mm_add_ps(R1,R0);
4235    vConstants = _mm_sub_ps(g_XMHalfPi,R1);
4236    return vConstants;
4237#else // _XM_VMX128_INTRINSICS_
4238#endif // _XM_VMX128_INTRINSICS_
4239}
4240
4241//------------------------------------------------------------------------------
4242
4243XMINLINE XMVECTOR XMVectorATan
4244(
4245    FXMVECTOR V
4246)
4247{
4248#if defined(_XM_NO_INTRINSICS_)
4249
4250    // Cody and Waite algorithm to compute inverse tangent.
4251
4252    XMVECTOR N, D;
4253    XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB;
4254    XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3;
4255    XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV;
4256    XMVECTOR Zero;
4257    XMVECTOR NegativeHalfPi;
4258    XMVECTOR Angle1, Angle2;
4259    XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV;
4260    XMVECTOR NegativeResult, Result;
4261    XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3;
4262    static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
4263    static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
4264    static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
4265    static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
4266
4267    Zero = XMVectorZero();
4268
4269    P0 = XMVectorSplatX(ATanConstants0);
4270    P1 = XMVectorSplatY(ATanConstants0);
4271    P2 = XMVectorSplatZ(ATanConstants0);
4272    P3 = XMVectorSplatW(ATanConstants0);
4273
4274    Q0 = XMVectorSplatX(ATanConstants1);
4275    Q1 = XMVectorSplatY(ATanConstants1);
4276    Q2 = XMVectorSplatZ(ATanConstants1);
4277    Q3 = XMVectorSplatW(ATanConstants1);
4278
4279    Sqrt3 = XMVectorSplatX(ATanConstants2);
4280    Sqrt3MinusOne = XMVectorSplatY(ATanConstants2);
4281    TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2);
4282    Epsilon = XMVectorSplatW(ATanConstants2);
4283
4284    HalfPi = XMVectorSplatX(ATanConstants3);
4285    OneThirdPi = XMVectorSplatY(ATanConstants3);
4286    OneSixthPi = XMVectorSplatZ(ATanConstants3);
4287    MaxV = XMVectorSplatW(ATanConstants3);
4288
4289    VF = XMVectorAbs(V);
4290    ReciprocalF = XMVectorReciprocal(VF);
4291
4292    F_GT_One = XMVectorGreater(VF, g_XMOne.v);
4293
4294    VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
4295    Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
4296    Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One);
4297
4298    F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3);
4299
4300    FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF);
4301    FA = XMVectorAdd(FA, g_XMNegativeOne.v);
4302    FB = XMVectorAdd(VF, Sqrt3);
4303    FB = XMVectorReciprocal(FB);
4304    FA = XMVectorMultiply(FA, FB);
4305
4306    VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3);
4307    Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3);
4308
4309    AbsF = XMVectorAbs(VF);
4310    AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon);
4311
4312    G = XMVectorMultiply(VF, VF);
4313
4314    D = XMVectorAdd(G, Q3);
4315    D = XMVectorMultiplyAdd(D, G, Q2);
4316    D = XMVectorMultiplyAdd(D, G, Q1);
4317    D = XMVectorMultiplyAdd(D, G, Q0);
4318    D = XMVectorReciprocal(D);
4319
4320    N = XMVectorMultiplyAdd(P3, G, P2);
4321    N = XMVectorMultiplyAdd(N, G, P1);
4322    N = XMVectorMultiplyAdd(N, G, P0);
4323    N = XMVectorMultiply(N, G);
4324    Result = XMVectorMultiply(N, D);
4325
4326    Result = XMVectorMultiplyAdd(Result, VF, VF);
4327
4328    Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon);
4329
4330    NegativeResult = XMVectorNegate(Result);
4331    Result = XMVectorSelect(Result, NegativeResult, F_GT_One);
4332
4333    Result = XMVectorAdd(Result, Angle1);
4334
4335    V_LT_Zero = XMVectorLess(V, Zero);
4336    NegativeResult = XMVectorNegate(Result);
4337    Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero);
4338
4339    MinV = XMVectorNegate(MaxV);
4340    NegativeHalfPi = XMVectorNegate(HalfPi);
4341    V_GT_MaxV = XMVectorGreater(V, MaxV);
4342    V_LT_MinV = XMVectorLess(V, MinV);
4343    Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV);
4344    Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV);
4345
4346    return Result;
4347
4348#elif defined(_XM_SSE_INTRINSICS_)
4349    static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
4350    static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
4351    static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
4352    static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
4353
4354    XMVECTOR VF = XMVectorAbs(V);
4355    XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne);
4356    XMVECTOR ReciprocalF = XMVectorReciprocal(VF);
4357    VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
4358    XMVECTOR Zero = XMVectorZero();
4359    XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]);
4360    XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
4361    // Pi/3
4362    XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]);
4363    // Pi/6
4364    XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]);
4365    Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One);
4366
4367    // 1-sqrt(3)
4368    XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]);
4369    FA = _mm_mul_ps(FA,VF);
4370    FA = _mm_add_ps(FA,VF);
4371    FA = _mm_add_ps(FA,g_XMNegativeOne);
4372    // sqrt(3)
4373    vConstants = _mm_load_ps1(&ATanConstants2.f[0]);
4374    vConstants = _mm_add_ps(vConstants,VF);
4375    FA = _mm_div_ps(FA,vConstants);
4376
4377    // 2-sqrt(3)
4378    vConstants = _mm_load_ps1(&ATanConstants2.f[2]);
4379    // >2-sqrt(3)?
4380    vConstants = _mm_cmpgt_ps(VF,vConstants);
4381    VF = XMVectorSelect(VF, FA, vConstants);
4382    Angle1 = XMVectorSelect(Angle1, Angle2, vConstants);
4383
4384    XMVECTOR AbsF = XMVectorAbs(VF);
4385
4386    XMVECTOR G = _mm_mul_ps(VF,VF);
4387    XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]);
4388    D = _mm_add_ps(D,G);
4389    D = _mm_mul_ps(D,G);
4390    vConstants = _mm_load_ps1(&ATanConstants1.f[2]);
4391    D = _mm_add_ps(D,vConstants);
4392    D = _mm_mul_ps(D,G);
4393    vConstants = _mm_load_ps1(&ATanConstants1.f[1]);
4394    D = _mm_add_ps(D,vConstants);
4395    D = _mm_mul_ps(D,G);
4396    vConstants = _mm_load_ps1(&ATanConstants1.f[0]);
4397    D = _mm_add_ps(D,vConstants);
4398
4399    XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]);
4400    N = _mm_mul_ps(N,G);
4401    vConstants = _mm_load_ps1(&ATanConstants0.f[2]);
4402    N = _mm_add_ps(N,vConstants);
4403    N = _mm_mul_ps(N,G);
4404    vConstants = _mm_load_ps1(&ATanConstants0.f[1]);
4405    N = _mm_add_ps(N,vConstants);
4406    N = _mm_mul_ps(N,G);
4407    vConstants = _mm_load_ps1(&ATanConstants0.f[0]);
4408    N = _mm_add_ps(N,vConstants);
4409    N = _mm_mul_ps(N,G);
4410    XMVECTOR Result = _mm_div_ps(N,D);
4411
4412    Result = _mm_mul_ps(Result,VF);
4413    Result = _mm_add_ps(Result,VF);
4414    // Epsilon
4415    vConstants = _mm_load_ps1(&ATanConstants2.f[3]);
4416    vConstants = _mm_cmpge_ps(vConstants,AbsF);
4417    Result = XMVectorSelect(Result,VF,vConstants);
4418
4419    XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
4420    Result = XMVectorSelect(Result,NegativeResult,F_GT_One);
4421    Result = _mm_add_ps(Result,Angle1);
4422
4423    Zero = _mm_cmpge_ps(Zero,V);
4424    NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
4425    Result = XMVectorSelect(Result,NegativeResult,Zero);
4426
4427    XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]);
4428    XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne);
4429    // Negate HalfPi
4430    HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne);
4431    MaxV = _mm_cmple_ps(MaxV,V);
4432    MinV = _mm_cmpge_ps(MinV,V);
4433    Result = XMVectorSelect(Result,g_XMHalfPi,MaxV);
4434    // HalfPi = -HalfPi
4435    Result = XMVectorSelect(Result,HalfPi,MinV);
4436    return Result;
4437#else // _XM_VMX128_INTRINSICS_
4438#endif // _XM_VMX128_INTRINSICS_
4439}
4440
4441//------------------------------------------------------------------------------
4442
4443XMINLINE XMVECTOR XMVectorATan2
4444(
4445    FXMVECTOR Y,
4446    FXMVECTOR X
4447)
4448{
4449#if defined(_XM_NO_INTRINSICS_)
4450
4451    // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
4452
4453    //     Y == 0 and X is Negative         -> Pi with the sign of Y
4454    //     y == 0 and x is positive         -> 0 with the sign of y
4455    //     Y != 0 and X == 0                -> Pi / 2 with the sign of Y
4456    //     Y != 0 and X is Negative         -> atan(y/x) + (PI with the sign of Y)
4457    //     X == -Infinity and Finite Y      -> Pi with the sign of Y
4458    //     X == +Infinity and Finite Y      -> 0 with the sign of Y
4459    //     Y == Infinity and X is Finite    -> Pi / 2 with the sign of Y
4460    //     Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
4461    //     Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
4462
4463    XMVECTOR Reciprocal;
4464    XMVECTOR V;
4465    XMVECTOR YSign;
4466    XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
4467    XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity;
4468    XMVECTOR ATanResultValid;
4469    XMVECTOR R0, R1, R2, R3, R4, R5;
4470    XMVECTOR Zero;
4471    XMVECTOR Result;
4472    static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
4473
4474    Zero = XMVectorZero();
4475    ATanResultValid = XMVectorTrueInt();
4476
4477    Pi = XMVectorSplatX(ATan2Constants);
4478    PiOverTwo = XMVectorSplatY(ATan2Constants);
4479    PiOverFour = XMVectorSplatZ(ATan2Constants);
4480    ThreePiOverFour = XMVectorSplatW(ATan2Constants);
4481
4482    YEqualsZero = XMVectorEqual(Y, Zero);
4483    XEqualsZero = XMVectorEqual(X, Zero);
4484    XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
4485    XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
4486    YEqualsInfinity = XMVectorIsInfinite(Y);
4487    XEqualsInfinity = XMVectorIsInfinite(X);
4488
4489    YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
4490    Pi = XMVectorOrInt(Pi, YSign);
4491    PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
4492    PiOverFour = XMVectorOrInt(PiOverFour, YSign);
4493    ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
4494
4495    R1 = XMVectorSelect(Pi, YSign, XIsPositive);
4496    R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
4497    R3 = XMVectorSelect(R2, R1, YEqualsZero);
4498    R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
4499    R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
4500    Result = XMVectorSelect(R3, R5, YEqualsInfinity);
4501    ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
4502
4503    Reciprocal = XMVectorReciprocal(X);
4504    V = XMVectorMultiply(Y, Reciprocal);
4505    R0 = XMVectorATan(V);
4506
4507    R1 = XMVectorSelect( Pi, Zero, XIsPositive );
4508    R2 = XMVectorAdd(R0, R1);
4509
4510    Result = XMVectorSelect(Result, R2, ATanResultValid);
4511
4512    return Result;
4513
4514#elif defined(_XM_SSE_INTRINSICS_)
4515    static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
4516
4517    // Mask if Y>0 && Y!=INF
4518    XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
4519    // Get the sign of (Y&0x80000000)
4520    XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
4521    // Get the sign bits of X
4522    XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
4523    // Change them to masks
4524    XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
4525    // Get Pi
4526    XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]);
4527    // Copy the sign of Y
4528    Pi = _mm_or_ps(Pi,YSign);
4529    XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive);
4530    // Mask for X==0
4531    XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
4532    // Get Pi/2 with with sign of Y
4533    XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
4534    PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
4535    XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
4536    // Mask for Y==0
4537    vConstants = _mm_cmpeq_ps(Y,g_XMZero);
4538    R2 = XMVectorSelect(R2,R1,vConstants);
4539    // Get Pi/4 with sign of Y
4540    XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
4541    PiOverFour = _mm_or_ps(PiOverFour,YSign);
4542    // Get (Pi*3)/4 with sign of Y
4543    XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
4544    ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
4545    vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
4546    XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
4547    vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
4548
4549    XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
4550    vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity);
4551    // At this point, any entry that's zero will get the result
4552    // from XMVectorATan(), otherwise, return the failsafe value
4553    vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
4554    // Any entries not 0xFFFFFFFF, are considered precalculated
4555    XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
4556    // Let's do the ATan2 function
4557    vConstants = _mm_div_ps(Y,X);
4558    vConstants = XMVectorATan(vConstants);
4559    // Discard entries that have been declared void
4560
4561    XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive );
4562    vConstants = _mm_add_ps( vConstants, R3 );
4563
4564    vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
4565    return vResult;
4566#else // _XM_VMX128_INTRINSICS_
4567#endif // _XM_VMX128_INTRINSICS_
4568}
4569
4570//------------------------------------------------------------------------------
4571
4572XMFINLINE XMVECTOR XMVectorSinEst
4573(
4574    FXMVECTOR V
4575)
4576{
4577#if defined(_XM_NO_INTRINSICS_)
4578
4579    XMVECTOR V2, V3, V5, V7;
4580    XMVECTOR S1, S2, S3;
4581    XMVECTOR Result;
4582
4583    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4584    V2 = XMVectorMultiply(V, V);
4585    V3 = XMVectorMultiply(V2, V);
4586    V5 = XMVectorMultiply(V3, V2);
4587    V7 = XMVectorMultiply(V5, V2);
4588
4589    S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
4590    S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
4591    S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
4592
4593    Result = XMVectorMultiplyAdd(S1, V3, V);
4594    Result = XMVectorMultiplyAdd(S2, V5, Result);
4595    Result = XMVectorMultiplyAdd(S3, V7, Result);
4596
4597    return Result;
4598
4599#elif defined(_XM_SSE_INTRINSICS_)
4600    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4601    XMVECTOR V2 = _mm_mul_ps(V,V);
4602    XMVECTOR V3 = _mm_mul_ps(V2,V);
4603    XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
4604    vResult = _mm_mul_ps(vResult,V3);
4605    vResult = _mm_add_ps(vResult,V);
4606    XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
4607    // V^5
4608    V3 = _mm_mul_ps(V3,V2);
4609    vConstants = _mm_mul_ps(vConstants,V3);
4610    vResult = _mm_add_ps(vResult,vConstants);
4611    vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
4612    // V^7
4613    V3 = _mm_mul_ps(V3,V2);
4614    vConstants = _mm_mul_ps(vConstants,V3);
4615    vResult = _mm_add_ps(vResult,vConstants);
4616    return vResult;
4617#else // _XM_VMX128_INTRINSICS_
4618#endif // _XM_VMX128_INTRINSICS_
4619}
4620
4621//------------------------------------------------------------------------------
4622
4623XMFINLINE XMVECTOR XMVectorCosEst
4624(
4625    FXMVECTOR V
4626)
4627{
4628#if defined(_XM_NO_INTRINSICS_)
4629
4630    XMVECTOR V2, V4, V6;
4631    XMVECTOR C0, C1, C2, C3;
4632    XMVECTOR Result;
4633
4634    V2 = XMVectorMultiply(V, V);
4635    V4 = XMVectorMultiply(V2, V2);
4636    V6 = XMVectorMultiply(V4, V2);
4637
4638    C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
4639    C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
4640    C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
4641    C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
4642
4643    Result = XMVectorMultiplyAdd(C1, V2, C0);
4644    Result = XMVectorMultiplyAdd(C2, V4, Result);
4645    Result = XMVectorMultiplyAdd(C3, V6, Result);
4646
4647    return Result;
4648
4649#elif defined(_XM_SSE_INTRINSICS_)
4650    // Get V^2
4651    XMVECTOR V2 = _mm_mul_ps(V,V);
4652    XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
4653    vResult = _mm_mul_ps(vResult,V2);
4654    XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
4655    vResult = _mm_add_ps(vResult,vConstants);
4656    vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
4657    // Get V^4
4658    XMVECTOR V4 = _mm_mul_ps(V2, V2);
4659    vConstants = _mm_mul_ps(vConstants,V4);
4660    vResult = _mm_add_ps(vResult,vConstants);
4661    vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
4662    // It's really V^6
4663    V4 = _mm_mul_ps(V4,V2);
4664    vConstants = _mm_mul_ps(vConstants,V4);
4665    vResult = _mm_add_ps(vResult,vConstants);
4666    return vResult;
4667#else // _XM_VMX128_INTRINSICS_
4668#endif // _XM_VMX128_INTRINSICS_
4669}
4670
4671//------------------------------------------------------------------------------
4672
4673XMFINLINE VOID XMVectorSinCosEst
4674(
4675    XMVECTOR* pSin,
4676    XMVECTOR* pCos,
4677    FXMVECTOR  V
4678)
4679{
4680#if defined(_XM_NO_INTRINSICS_)
4681
4682    XMVECTOR V2, V3, V4, V5, V6, V7;
4683    XMVECTOR S1, S2, S3;
4684    XMVECTOR C0, C1, C2, C3;
4685    XMVECTOR Sin, Cos;
4686
4687    XMASSERT(pSin);
4688    XMASSERT(pCos);
4689
4690    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4691    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
4692    V2 = XMVectorMultiply(V, V);
4693    V3 = XMVectorMultiply(V2, V);
4694    V4 = XMVectorMultiply(V2, V2);
4695    V5 = XMVectorMultiply(V3, V2);
4696    V6 = XMVectorMultiply(V3, V3);
4697    V7 = XMVectorMultiply(V4, V3);
4698
4699    S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
4700    S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
4701    S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
4702
4703    C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
4704    C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
4705    C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
4706    C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
4707
4708    Sin = XMVectorMultiplyAdd(S1, V3, V);
4709    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
4710    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
4711
4712    Cos = XMVectorMultiplyAdd(C1, V2, C0);
4713    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
4714    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
4715
4716    *pSin = Sin;
4717    *pCos = Cos;
4718
4719#elif defined(_XM_SSE_INTRINSICS_)
4720    XMASSERT(pSin);
4721    XMASSERT(pCos);
4722    XMVECTOR V2, V3, V4, V5, V6, V7;
4723    XMVECTOR S1, S2, S3;
4724    XMVECTOR C0, C1, C2, C3;
4725    XMVECTOR Sin, Cos;
4726
4727    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4728    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
4729    V2 = XMVectorMultiply(V, V);
4730    V3 = XMVectorMultiply(V2, V);
4731    V4 = XMVectorMultiply(V2, V2);
4732    V5 = XMVectorMultiply(V3, V2);
4733    V6 = XMVectorMultiply(V3, V3);
4734    V7 = XMVectorMultiply(V4, V3);
4735
4736    S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
4737    S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
4738    S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
4739
4740    C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
4741    C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
4742    C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
4743    C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
4744
4745    Sin = XMVectorMultiplyAdd(S1, V3, V);
4746    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
4747    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
4748
4749    Cos = XMVectorMultiplyAdd(C1, V2, C0);
4750    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
4751    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
4752
4753    *pSin = Sin;
4754    *pCos = Cos;
4755#else // _XM_VMX128_INTRINSICS_
4756#endif // _XM_VMX128_INTRINSICS_
4757}
4758
4759//------------------------------------------------------------------------------
4760
4761XMFINLINE XMVECTOR XMVectorTanEst
4762(
4763    FXMVECTOR V
4764)
4765{
4766#if defined(_XM_NO_INTRINSICS_)
4767
4768    XMVECTOR V1, V2, V1T0, V1T1, V2T2;
4769    XMVECTOR T0, T1, T2;
4770    XMVECTOR N, D;
4771    XMVECTOR OneOverPi;
4772    XMVECTOR Result;
4773
4774    OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
4775
4776    V1 = XMVectorMultiply(V, OneOverPi);
4777    V1 = XMVectorRound(V1);
4778
4779    V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V);
4780
4781    T0 = XMVectorSplatX(g_XMTanEstCoefficients.v);
4782    T1 = XMVectorSplatY(g_XMTanEstCoefficients.v);
4783    T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v);
4784
4785    V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
4786    V2 = XMVectorMultiply(V1, V1);
4787    V1T0 = XMVectorMultiply(V1, T0);
4788    V1T1 = XMVectorMultiply(V1, T1);
4789
4790    D = XMVectorReciprocalEst(V2T2);
4791    N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
4792
4793    Result = XMVectorMultiply(N, D);
4794
4795    return Result;
4796
4797#elif defined(_XM_SSE_INTRINSICS_)
4798    XMVECTOR V1, V2, V1T0, V1T1, V2T2;
4799    XMVECTOR T0, T1, T2;
4800    XMVECTOR N, D;
4801    XMVECTOR OneOverPi;
4802    XMVECTOR Result;
4803
4804    OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients);
4805
4806    V1 = XMVectorMultiply(V, OneOverPi);
4807    V1 = XMVectorRound(V1);
4808
4809    V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V);
4810
4811    T0 = XMVectorSplatX(g_XMTanEstCoefficients);
4812    T1 = XMVectorSplatY(g_XMTanEstCoefficients);
4813    T2 = XMVectorSplatZ(g_XMTanEstCoefficients);
4814
4815    V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
4816    V2 = XMVectorMultiply(V1, V1);
4817    V1T0 = XMVectorMultiply(V1, T0);
4818    V1T1 = XMVectorMultiply(V1, T1);
4819
4820    D = XMVectorReciprocalEst(V2T2);
4821    N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
4822
4823    Result = XMVectorMultiply(N, D);
4824
4825    return Result;
4826#else // _XM_VMX128_INTRINSICS_
4827#endif // _XM_VMX128_INTRINSICS_
4828}
4829
4830//------------------------------------------------------------------------------
4831
4832XMFINLINE XMVECTOR XMVectorSinHEst
4833(
4834    FXMVECTOR V
4835)
4836{
4837#if defined(_XM_NO_INTRINSICS_)
4838
4839    XMVECTOR V1, V2;
4840    XMVECTOR E1, E2;
4841    XMVECTOR Result;
4842    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4843
4844    V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
4845    V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
4846
4847    E1 = XMVectorExpEst(V1);
4848    E2 = XMVectorExpEst(V2);
4849
4850    Result = XMVectorSubtract(E1, E2);
4851
4852    return Result;
4853
4854#elif defined(_XM_SSE_INTRINSICS_)
4855    XMVECTOR V1, V2;
4856    XMVECTOR E1, E2;
4857    XMVECTOR Result;
4858    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4859
4860    V1 = _mm_mul_ps(V,Scale);
4861    V1 = _mm_add_ps(V1,g_XMNegativeOne);
4862    V2 = _mm_mul_ps(V,Scale);
4863    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
4864    E1 = XMVectorExpEst(V1);
4865    E2 = XMVectorExpEst(V2);
4866    Result = _mm_sub_ps(E1, E2);
4867    return Result;
4868#else // _XM_VMX128_INTRINSICS_
4869#endif // _XM_VMX128_INTRINSICS_
4870}
4871
4872//------------------------------------------------------------------------------
4873
4874XMFINLINE XMVECTOR XMVectorCosHEst
4875(
4876    FXMVECTOR V
4877)
4878{
4879#if defined(_XM_NO_INTRINSICS_)
4880
4881    XMVECTOR V1, V2;
4882    XMVECTOR E1, E2;
4883    XMVECTOR Result;
4884    static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4885
4886    V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
4887    V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
4888
4889    E1 = XMVectorExpEst(V1);
4890    E2 = XMVectorExpEst(V2);
4891
4892    Result = XMVectorAdd(E1, E2);
4893
4894    return Result;
4895
4896#elif defined(_XM_SSE_INTRINSICS_)
4897    XMVECTOR V1, V2;
4898    XMVECTOR E1, E2;
4899    XMVECTOR Result;
4900    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4901
4902    V1 = _mm_mul_ps(V,Scale);
4903    V1 = _mm_add_ps(V1,g_XMNegativeOne);
4904    V2 = _mm_mul_ps(V, Scale);
4905    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
4906    E1 = XMVectorExpEst(V1);
4907    E2 = XMVectorExpEst(V2);
4908    Result = _mm_add_ps(E1, E2);
4909    return Result;
4910#else // _XM_VMX128_INTRINSICS_
4911#endif // _XM_VMX128_INTRINSICS_
4912}
4913
4914//------------------------------------------------------------------------------
4915
4916XMFINLINE XMVECTOR XMVectorTanHEst
4917(
4918    FXMVECTOR V
4919)
4920{
4921#if defined(_XM_NO_INTRINSICS_)
4922
4923    XMVECTOR E;
4924    XMVECTOR Result;
4925    static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
4926
4927    E = XMVectorMultiply(V, Scale);
4928    E = XMVectorExpEst(E);
4929    E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
4930    E = XMVectorReciprocalEst(E);
4931
4932    Result = XMVectorSubtract(g_XMOne.v, E);
4933
4934    return Result;
4935
4936#elif defined(_XM_SSE_INTRINSICS_)
4937    static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
4938
4939    XMVECTOR E = _mm_mul_ps(V, Scale);
4940    E = XMVectorExpEst(E);
4941    E = _mm_mul_ps(E,g_XMOneHalf);
4942    E = _mm_add_ps(E,g_XMOneHalf);
4943    E = XMVectorReciprocalEst(E);
4944    E = _mm_sub_ps(g_XMOne, E);
4945    return E;
4946#else // _XM_VMX128_INTRINSICS_
4947#endif // _XM_VMX128_INTRINSICS_
4948}
4949
4950//------------------------------------------------------------------------------
4951
4952XMFINLINE XMVECTOR XMVectorASinEst
4953(
4954    FXMVECTOR V
4955)
4956{
4957#if defined(_XM_NO_INTRINSICS_)
4958
4959    XMVECTOR AbsV, V2, VD, VC0, V2C3;
4960    XMVECTOR C0, C1, C2, C3;
4961    XMVECTOR D, Rsq, SqrtD;
4962    XMVECTOR OnePlusEps;
4963    XMVECTOR Result;
4964
4965    AbsV = XMVectorAbs(V);
4966
4967    OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
4968
4969    C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
4970    C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
4971    C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
4972    C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
4973
4974    D = XMVectorSubtract(OnePlusEps, AbsV);
4975
4976    Rsq = XMVectorReciprocalSqrtEst(D);
4977    SqrtD = XMVectorMultiply(D, Rsq);
4978
4979    V2 = XMVectorMultiply(V, AbsV);
4980    V2C3 = XMVectorMultiply(V2, C3);
4981    VD = XMVectorMultiply(D, AbsV);
4982    VC0 = XMVectorMultiply(V, C0);
4983
4984    Result = XMVectorMultiply(V, C1);
4985    Result = XMVectorMultiplyAdd(V2, C2, Result);
4986    Result = XMVectorMultiplyAdd(V2C3, VD, Result);
4987    Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
4988
4989    return Result;
4990
4991#elif defined(_XM_SSE_INTRINSICS_)
4992    // Get abs(V)
4993	XMVECTOR vAbsV = _mm_setzero_ps();
4994	vAbsV = _mm_sub_ps(vAbsV,V);
4995	vAbsV = _mm_max_ps(vAbsV,V);
4996
4997    XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
4998    D = _mm_sub_ps(D,vAbsV);
4999    // Since this is an estimate, rqsrt is okay
5000    XMVECTOR vConstants = _mm_rsqrt_ps(D);
5001    XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
5002    // V2 = V^2 retaining sign
5003    XMVECTOR V2 = _mm_mul_ps(V,vAbsV);
5004    D = _mm_mul_ps(D,vAbsV);
5005
5006    XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
5007    vResult = _mm_mul_ps(vResult,V);
5008    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
5009    vConstants = _mm_mul_ps(vConstants,V2);
5010    vResult = _mm_add_ps(vResult,vConstants);
5011
5012    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
5013    vConstants = _mm_mul_ps(vConstants,V2);
5014    vConstants = _mm_mul_ps(vConstants,D);
5015    vResult = _mm_add_ps(vResult,vConstants);
5016
5017    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
5018    vConstants = _mm_mul_ps(vConstants,V);
5019    vConstants = _mm_mul_ps(vConstants,SqrtD);
5020    vResult = _mm_add_ps(vResult,vConstants);
5021    return vResult;
5022#else // _XM_VMX128_INTRINSICS_
5023#endif // _XM_VMX128_INTRINSICS_
5024}
5025
5026//------------------------------------------------------------------------------
5027
5028XMFINLINE XMVECTOR XMVectorACosEst
5029(
5030    FXMVECTOR V
5031)
5032{
5033#if defined(_XM_NO_INTRINSICS_)
5034
5035    XMVECTOR AbsV, V2, VD, VC0, V2C3;
5036    XMVECTOR C0, C1, C2, C3;
5037    XMVECTOR D, Rsq, SqrtD;
5038    XMVECTOR OnePlusEps, HalfPi;
5039    XMVECTOR Result;
5040
5041    // acos(V) = PI / 2 - asin(V)
5042
5043    AbsV = XMVectorAbs(V);
5044
5045    OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
5046    HalfPi = XMVectorSplatY(g_XMASinEstConstants.v);
5047
5048    C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
5049    C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
5050    C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
5051    C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
5052
5053    D = XMVectorSubtract(OnePlusEps, AbsV);
5054
5055    Rsq = XMVectorReciprocalSqrtEst(D);
5056    SqrtD = XMVectorMultiply(D, Rsq);
5057
5058    V2 = XMVectorMultiply(V, AbsV);
5059    V2C3 = XMVectorMultiply(V2, C3);
5060    VD = XMVectorMultiply(D, AbsV);
5061    VC0 = XMVectorMultiply(V, C0);
5062
5063    Result = XMVectorMultiply(V, C1);
5064    Result = XMVectorMultiplyAdd(V2, C2, Result);
5065    Result = XMVectorMultiplyAdd(V2C3, VD, Result);
5066    Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
5067    Result = XMVectorSubtract(HalfPi, Result);
5068
5069    return Result;
5070
5071#elif defined(_XM_SSE_INTRINSICS_)
5072    // acos(V) = PI / 2 - asin(V)
5073    // Get abs(V)
5074	XMVECTOR vAbsV = _mm_setzero_ps();
5075	vAbsV = _mm_sub_ps(vAbsV,V);
5076	vAbsV = _mm_max_ps(vAbsV,V);
5077    // Calc D
5078    XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
5079    D = _mm_sub_ps(D,vAbsV);
5080    // SqrtD = sqrt(D-abs(V)) estimated
5081    XMVECTOR vConstants = _mm_rsqrt_ps(D);
5082    XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
5083    // V2 = V^2 while retaining sign
5084    XMVECTOR V2 = _mm_mul_ps(V, vAbsV);
5085    // Drop vAbsV here. D = (Const-abs(V))*abs(V)
5086    D = _mm_mul_ps(D, vAbsV);
5087
5088    XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
5089    vResult = _mm_mul_ps(vResult,V);
5090    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
5091    vConstants = _mm_mul_ps(vConstants,V2);
5092    vResult = _mm_add_ps(vResult,vConstants);
5093
5094    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
5095    vConstants = _mm_mul_ps(vConstants,V2);
5096    vConstants = _mm_mul_ps(vConstants,D);
5097    vResult = _mm_add_ps(vResult,vConstants);
5098
5099    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
5100    vConstants = _mm_mul_ps(vConstants,V);
5101    vConstants = _mm_mul_ps(vConstants,SqrtD);
5102    vResult = _mm_add_ps(vResult,vConstants);
5103
5104    vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]);
5105    vResult = _mm_sub_ps(vConstants,vResult);
5106    return vResult;
5107#else // _XM_VMX128_INTRINSICS_
5108#endif // _XM_VMX128_INTRINSICS_
5109}
5110
5111//------------------------------------------------------------------------------
5112
5113XMFINLINE XMVECTOR XMVectorATanEst
5114(
5115    FXMVECTOR V
5116)
5117{
5118#if defined(_XM_NO_INTRINSICS_)
5119
5120    XMVECTOR AbsV, V2S2, N, D;
5121    XMVECTOR S0, S1, S2;
5122    XMVECTOR HalfPi;
5123    XMVECTOR Result;
5124
5125    S0 = XMVectorSplatX(g_XMATanEstCoefficients.v);
5126    S1 = XMVectorSplatY(g_XMATanEstCoefficients.v);
5127    S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v);
5128    HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v);
5129
5130    AbsV = XMVectorAbs(V);
5131
5132    V2S2 = XMVectorMultiplyAdd(V, V, S2);
5133    N = XMVectorMultiplyAdd(AbsV, HalfPi, S0);
5134    D = XMVectorMultiplyAdd(AbsV, S1, V2S2);
5135    N = XMVectorMultiply(N, V);
5136    D = XMVectorReciprocalEst(D);
5137
5138    Result = XMVectorMultiply(N, D);
5139
5140    return Result;
5141
5142#elif defined(_XM_SSE_INTRINSICS_)
5143    // Get abs(V)
5144	XMVECTOR vAbsV = _mm_setzero_ps();
5145	vAbsV = _mm_sub_ps(vAbsV,V);
5146	vAbsV = _mm_max_ps(vAbsV,V);
5147
5148    XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]);
5149    vResult = _mm_mul_ps(vResult,vAbsV);
5150    XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]);
5151    vResult = _mm_add_ps(vResult,vConstants);
5152    vResult = _mm_mul_ps(vResult,V);
5153
5154    XMVECTOR D = _mm_mul_ps(V,V);
5155    vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]);
5156    D = _mm_add_ps(D,vConstants);
5157    vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]);
5158    vConstants = _mm_mul_ps(vConstants,vAbsV);
5159    D = _mm_add_ps(D,vConstants);
5160    vResult = _mm_div_ps(vResult,D);
5161    return vResult;
5162#else // _XM_VMX128_INTRINSICS_
5163#endif // _XM_VMX128_INTRINSICS_
5164}
5165
5166//------------------------------------------------------------------------------
5167
5168XMFINLINE XMVECTOR XMVectorATan2Est
5169(
5170    FXMVECTOR Y,
5171    FXMVECTOR X
5172)
5173{
5174#if defined(_XM_NO_INTRINSICS_)
5175
5176    XMVECTOR Reciprocal;
5177    XMVECTOR V;
5178    XMVECTOR YSign;
5179    XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
5180    XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity;
5181    XMVECTOR ATanResultValid;
5182    XMVECTOR R0, R1, R2, R3, R4, R5;
5183    XMVECTOR Zero;
5184    XMVECTOR Result;
5185    static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
5186
5187    Zero = XMVectorZero();
5188    ATanResultValid = XMVectorTrueInt();
5189
5190    Pi = XMVectorSplatX(ATan2Constants);
5191    PiOverTwo = XMVectorSplatY(ATan2Constants);
5192    PiOverFour = XMVectorSplatZ(ATan2Constants);
5193    ThreePiOverFour = XMVectorSplatW(ATan2Constants);
5194
5195    YEqualsZero = XMVectorEqual(Y, Zero);
5196    XEqualsZero = XMVectorEqual(X, Zero);
5197    XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
5198    XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
5199    YEqualsInfinity = XMVectorIsInfinite(Y);
5200    XEqualsInfinity = XMVectorIsInfinite(X);
5201
5202    YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
5203    Pi = XMVectorOrInt(Pi, YSign);
5204    PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
5205    PiOverFour = XMVectorOrInt(PiOverFour, YSign);
5206    ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
5207
5208    R1 = XMVectorSelect(Pi, YSign, XIsPositive);
5209    R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
5210    R3 = XMVectorSelect(R2, R1, YEqualsZero);
5211    R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
5212    R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
5213    Result = XMVectorSelect(R3, R5, YEqualsInfinity);
5214    ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
5215
5216    Reciprocal = XMVectorReciprocalEst(X);
5217    V = XMVectorMultiply(Y, Reciprocal);
5218    R0 = XMVectorATanEst(V);
5219
5220    R1 = XMVectorSelect( Pi, Zero, XIsPositive );
5221    R2 = XMVectorAdd(R0, R1);
5222
5223    Result = XMVectorSelect(Result, R2, ATanResultValid);
5224
5225    return Result;
5226
5227#elif defined(_XM_SSE_INTRINSICS_)
5228    static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
5229
5230    // Mask if Y>0 && Y!=INF
5231    XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
5232    // Get the sign of (Y&0x80000000)
5233    XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
5234    // Get the sign bits of X
5235    XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
5236    // Change them to masks
5237    XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
5238    // Get Pi
5239    XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]);
5240    // Copy the sign of Y
5241    Pi = _mm_or_ps(Pi,YSign);
5242    XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive);
5243    // Mask for X==0
5244    XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
5245    // Get Pi/2 with with sign of Y
5246    XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
5247    PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
5248    XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
5249    // Mask for Y==0
5250    vConstants = _mm_cmpeq_ps(Y,g_XMZero);
5251    R2 = XMVectorSelect(R2,R1,vConstants);
5252    // Get Pi/4 with sign of Y
5253    XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
5254    PiOverFour = _mm_or_ps(PiOverFour,YSign);
5255    // Get (Pi*3)/4 with sign of Y
5256    XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
5257    ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
5258    vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
5259    XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
5260    vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
5261
5262    XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
5263    vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity);
5264    // At this point, any entry that's zero will get the result
5265    // from XMVectorATan(), otherwise, return the failsafe value
5266    vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
5267    // Any entries not 0xFFFFFFFF, are considered precalculated
5268    XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
5269    // Let's do the ATan2 function
5270    XMVECTOR Reciprocal = _mm_rcp_ps(X);
5271    vConstants = _mm_mul_ps(Y, Reciprocal);
5272    vConstants = XMVectorATanEst(vConstants);
5273    // Discard entries that have been declared void
5274
5275    XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive );
5276    vConstants = _mm_add_ps( vConstants, R3 );
5277
5278    vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
5279    return vResult;
5280#else // _XM_VMX128_INTRINSICS_
5281#endif // _XM_VMX128_INTRINSICS_
5282}
5283
5284//------------------------------------------------------------------------------
5285
5286XMFINLINE XMVECTOR XMVectorLerp
5287(
5288    FXMVECTOR V0,
5289    FXMVECTOR V1,
5290    FLOAT    t
5291)
5292{
5293#if defined(_XM_NO_INTRINSICS_)
5294
5295    XMVECTOR Scale;
5296    XMVECTOR Length;
5297    XMVECTOR Result;
5298
5299    // V0 + t * (V1 - V0)
5300    Scale = XMVectorReplicate(t);
5301    Length = XMVectorSubtract(V1, V0);
5302    Result = XMVectorMultiplyAdd(Length, Scale, V0);
5303
5304    return Result;
5305
5306#elif defined(_XM_SSE_INTRINSICS_)
5307	XMVECTOR L, S;
5308	XMVECTOR Result;
5309
5310	L = _mm_sub_ps( V1, V0 );
5311
5312	S = _mm_set_ps1( t );
5313
5314	Result = _mm_mul_ps( L, S );
5315
5316	return _mm_add_ps( Result, V0 );
5317#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5318#endif // _XM_VMX128_INTRINSICS_
5319}
5320
5321//------------------------------------------------------------------------------
5322
5323XMFINLINE XMVECTOR XMVectorLerpV
5324(
5325    FXMVECTOR V0,
5326    FXMVECTOR V1,
5327    FXMVECTOR T
5328)
5329{
5330#if defined(_XM_NO_INTRINSICS_)
5331
5332    XMVECTOR Length;
5333    XMVECTOR Result;
5334
5335    // V0 + T * (V1 - V0)
5336    Length = XMVectorSubtract(V1, V0);
5337    Result = XMVectorMultiplyAdd(Length, T, V0);
5338
5339    return Result;
5340
5341#elif defined(_XM_SSE_INTRINSICS_)
5342	XMVECTOR Length;
5343	XMVECTOR Result;
5344
5345	Length = _mm_sub_ps( V1, V0 );
5346
5347	Result = _mm_mul_ps( Length, T );
5348
5349	return _mm_add_ps( Result, V0 );
5350#else // _XM_VMX128_INTRINSICS_
5351#endif // _XM_VMX128_INTRINSICS_
5352}
5353
5354//------------------------------------------------------------------------------
5355
5356XMFINLINE XMVECTOR XMVectorHermite
5357(
5358    FXMVECTOR Position0,
5359    FXMVECTOR Tangent0,
5360    FXMVECTOR Position1,
5361    CXMVECTOR Tangent1,
5362    FLOAT    t
5363)
5364{
5365#if defined(_XM_NO_INTRINSICS_)
5366
5367    XMVECTOR P0;
5368    XMVECTOR T0;
5369    XMVECTOR P1;
5370    XMVECTOR T1;
5371    XMVECTOR Result;
5372    FLOAT    t2;
5373    FLOAT    t3;
5374
5375    // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5376    //          (t^3 - 2 * t^2 + t) * Tangent0 +
5377    //          (-2 * t^3 + 3 * t^2) * Position1 +
5378    //          (t^3 - t^2) * Tangent1
5379    t2 = t * t;
5380    t3 = t * t2;
5381
5382    P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f);
5383    T0 = XMVectorReplicate(t3 - 2.0f * t2 + t);
5384    P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2);
5385    T1 = XMVectorReplicate(t3 - t2);
5386
5387    Result = XMVectorMultiply(P0, Position0);
5388    Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
5389    Result = XMVectorMultiplyAdd(P1, Position1, Result);
5390    Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
5391
5392    return Result;
5393
5394#elif defined(_XM_SSE_INTRINSICS_)
5395    FLOAT t2 = t * t;
5396    FLOAT t3 = t * t2;
5397
5398    XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f);
5399    XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t);
5400    XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2);
5401    XMVECTOR T1 = _mm_set_ps1(t3 - t2);
5402
5403    XMVECTOR vResult = _mm_mul_ps(P0, Position0);
5404    XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0);
5405    vResult = _mm_add_ps(vResult,vTemp);
5406    vTemp = _mm_mul_ps(P1, Position1);
5407    vResult = _mm_add_ps(vResult,vTemp);
5408    vTemp = _mm_mul_ps(T1, Tangent1);
5409    vResult = _mm_add_ps(vResult,vTemp);
5410    return vResult;
5411#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5412#endif // _XM_VMX128_INTRINSICS_
5413}
5414
5415//------------------------------------------------------------------------------
5416
5417XMFINLINE XMVECTOR XMVectorHermiteV
5418(
5419    FXMVECTOR Position0,
5420    FXMVECTOR Tangent0,
5421    FXMVECTOR Position1,
5422    CXMVECTOR Tangent1,
5423    CXMVECTOR T
5424)
5425{
5426#if defined(_XM_NO_INTRINSICS_)
5427
5428    XMVECTOR P0;
5429    XMVECTOR T0;
5430    XMVECTOR P1;
5431    XMVECTOR T1;
5432    XMVECTOR Result;
5433    XMVECTOR T2;
5434    XMVECTOR T3;
5435
5436    // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5437    //          (t^3 - 2 * t^2 + t) * Tangent0 +
5438    //          (-2 * t^3 + 3 * t^2) * Position1 +
5439    //          (t^3 - t^2) * Tangent1
5440    T2 = XMVectorMultiply(T, T);
5441    T3 = XMVectorMultiply(T , T2);
5442
5443    P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f);
5444    T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]);
5445    P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]);
5446    T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]);
5447
5448    Result = XMVectorMultiply(P0, Position0);
5449    Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
5450    Result = XMVectorMultiplyAdd(P1, Position1, Result);
5451    Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
5452
5453    return Result;
5454
5455#elif defined(_XM_SSE_INTRINSICS_)
5456    static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
5457    static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
5458
5459    // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5460    //          (t^3 - 2 * t^2 + t) * Tangent0 +
5461    //          (-2 * t^3 + 3 * t^2) * Position1 +
5462    //          (t^3 - t^2) * Tangent1
5463    XMVECTOR T2 = _mm_mul_ps(T,T);
5464    XMVECTOR T3 = _mm_mul_ps(T,T2);
5465    // Mul by the constants against t^2
5466    T2 = _mm_mul_ps(T2,CatMulT2);
5467    // Mul by the constants against t^3
5468    T3 = _mm_mul_ps(T3,CatMulT3);
5469    // T3 now has the pre-result.
5470    T3 = _mm_add_ps(T3,T2);
5471    // I need to add t.y only
5472    T2 = _mm_and_ps(T,g_XMMaskY);
5473    T3 = _mm_add_ps(T3,T2);
5474    // Add 1.0f to x
5475    T3 = _mm_add_ps(T3,g_XMIdentityR0);
5476    // Now, I have the constants created
5477    // Mul the x constant to Position0
5478    XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0));
5479    vResult = _mm_mul_ps(vResult,Position0);
5480    // Mul the y constant to Tangent0
5481    T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1));
5482    T2 = _mm_mul_ps(T2,Tangent0);
5483    vResult = _mm_add_ps(vResult,T2);
5484    // Mul the z constant to Position1
5485    T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2));
5486    T2 = _mm_mul_ps(T2,Position1);
5487    vResult = _mm_add_ps(vResult,T2);
5488    // Mul the w constant to Tangent1
5489    T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3));
5490    T3 = _mm_mul_ps(T3,Tangent1);
5491    vResult = _mm_add_ps(vResult,T3);
5492    return vResult;
5493#else // _XM_VMX128_INTRINSICS_
5494#endif // _XM_VMX128_INTRINSICS_
5495}
5496
5497//------------------------------------------------------------------------------
5498
5499XMFINLINE XMVECTOR XMVectorCatmullRom
5500(
5501    FXMVECTOR Position0,
5502    FXMVECTOR Position1,
5503    FXMVECTOR Position2,
5504    CXMVECTOR Position3,
5505    FLOAT    t
5506)
5507{
5508#if defined(_XM_NO_INTRINSICS_)
5509
5510    XMVECTOR P0;
5511    XMVECTOR P1;
5512    XMVECTOR P2;
5513    XMVECTOR P3;
5514    XMVECTOR Result;
5515    FLOAT    t2;
5516    FLOAT    t3;
5517
5518    // Result = ((-t^3 + 2 * t^2 - t) * Position0 +
5519    //           (3 * t^3 - 5 * t^2 + 2) * Position1 +
5520    //           (-3 * t^3 + 4 * t^2 + t) * Position2 +
5521    //           (t^3 - t^2) * Position3) * 0.5
5522    t2 = t * t;
5523    t3 = t * t2;
5524
5525    P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f);
5526    P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
5527    P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
5528    P3 = XMVectorReplicate((t3 - t2) * 0.5f);
5529
5530    Result = XMVectorMultiply(P0, Position0);
5531    Result = XMVectorMultiplyAdd(P1, Position1, Result);
5532    Result = XMVectorMultiplyAdd(P2, Position2, Result);
5533    Result = XMVectorMultiplyAdd(P3, Position3, Result);
5534
5535    return Result;
5536
5537#elif defined(_XM_SSE_INTRINSICS_)
5538    FLOAT t2 = t * t;
5539    FLOAT t3 = t * t2;
5540
5541    XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f);
5542    XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
5543    XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
5544    XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f);
5545
5546    P0 = _mm_mul_ps(P0, Position0);
5547    P1 = _mm_mul_ps(P1, Position1);
5548    P2 = _mm_mul_ps(P2, Position2);
5549    P3 = _mm_mul_ps(P3, Position3);
5550    P0 = _mm_add_ps(P0,P1);
5551    P2 = _mm_add_ps(P2,P3);
5552    P0 = _mm_add_ps(P0,P2);
5553    return P0;
5554#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5555#endif // _XM_VMX128_INTRINSICS_
5556}
5557
5558//------------------------------------------------------------------------------
5559
5560XMFINLINE XMVECTOR XMVectorCatmullRomV
5561(
5562    FXMVECTOR Position0,
5563    FXMVECTOR Position1,
5564    FXMVECTOR Position2,
5565    CXMVECTOR Position3,
5566    CXMVECTOR T
5567)
5568{
5569#if defined(_XM_NO_INTRINSICS_)
5570    float fx = T.vector4_f32[0];
5571    float fy = T.vector4_f32[1];
5572    float fz = T.vector4_f32[2];
5573    float fw = T.vector4_f32[3];
5574    XMVECTOR vResult = {
5575        0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.vector4_f32[0]+
5576        (3*fx*fx*fx-5*fx*fx+2)*Position1.vector4_f32[0]+
5577        (-3*fx*fx*fx+4*fx*fx+fx)*Position2.vector4_f32[0]+
5578        (fx*fx*fx-fx*fx)*Position3.vector4_f32[0]),
5579        0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.vector4_f32[1]+
5580        (3*fy*fy*fy-5*fy*fy+2)*Position1.vector4_f32[1]+
5581        (-3*fy*fy*fy+4*fy*fy+fy)*Position2.vector4_f32[1]+
5582        (fy*fy*fy-fy*fy)*Position3.vector4_f32[1]),
5583        0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.vector4_f32[2]+
5584        (3*fz*fz*fz-5*fz*fz+2)*Position1.vector4_f32[2]+
5585        (-3*fz*fz*fz+4*fz*fz+fz)*Position2.vector4_f32[2]+
5586        (fz*fz*fz-fz*fz)*Position3.vector4_f32[2]),
5587        0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.vector4_f32[3]+
5588        (3*fw*fw*fw-5*fw*fw+2)*Position1.vector4_f32[3]+
5589        (-3*fw*fw*fw+4*fw*fw+fw)*Position2.vector4_f32[3]+
5590        (fw*fw*fw-fw*fw)*Position3.vector4_f32[3])
5591    };
5592    return vResult;
5593#elif defined(_XM_SSE_INTRINSICS_)
5594    static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f};
5595    static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f};
5596    static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f};
5597    static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f};
5598    // Cache T^2 and T^3
5599    XMVECTOR T2 = _mm_mul_ps(T,T);
5600    XMVECTOR T3 = _mm_mul_ps(T,T2);
5601    // Perform the Position0 term
5602    XMVECTOR vResult = _mm_add_ps(T2,T2);
5603    vResult = _mm_sub_ps(vResult,T);
5604    vResult = _mm_sub_ps(vResult,T3);
5605    vResult = _mm_mul_ps(vResult,Position0);
5606    // Perform the Position1 term and add
5607    XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3);
5608    XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5);
5609    vTemp = _mm_sub_ps(vTemp,vTemp2);
5610    vTemp = _mm_add_ps(vTemp,Catmul2);
5611    vTemp = _mm_mul_ps(vTemp,Position1);
5612    vResult = _mm_add_ps(vResult,vTemp);
5613    // Perform the Position2 term and add
5614    vTemp = _mm_mul_ps(T2,Catmul4);
5615    vTemp2 = _mm_mul_ps(T3,Catmul3);
5616    vTemp = _mm_sub_ps(vTemp,vTemp2);
5617    vTemp = _mm_add_ps(vTemp,T);
5618    vTemp = _mm_mul_ps(vTemp,Position2);
5619    vResult = _mm_add_ps(vResult,vTemp);
5620    // Position3 is the last term
5621    T3 = _mm_sub_ps(T3,T2);
5622    T3 = _mm_mul_ps(T3,Position3);
5623    vResult = _mm_add_ps(vResult,T3);
5624    // Multiply by 0.5f and exit
5625    vResult = _mm_mul_ps(vResult,g_XMOneHalf);
5626    return vResult;
5627#else // _XM_VMX128_INTRINSICS_
5628#endif // _XM_VMX128_INTRINSICS_
5629}
5630
5631//------------------------------------------------------------------------------
5632
5633XMFINLINE XMVECTOR XMVectorBaryCentric
5634(
5635    FXMVECTOR Position0,
5636    FXMVECTOR Position1,
5637    FXMVECTOR Position2,
5638    FLOAT    f,
5639    FLOAT    g
5640)
5641{
5642#if defined(_XM_NO_INTRINSICS_)
5643
5644    // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
5645    XMVECTOR P10;
5646    XMVECTOR P20;
5647    XMVECTOR ScaleF;
5648    XMVECTOR ScaleG;
5649    XMVECTOR Result;
5650
5651    P10 = XMVectorSubtract(Position1, Position0);
5652    ScaleF = XMVectorReplicate(f);
5653
5654    P20 = XMVectorSubtract(Position2, Position0);
5655    ScaleG = XMVectorReplicate(g);
5656
5657    Result = XMVectorMultiplyAdd(P10, ScaleF, Position0);
5658    Result = XMVectorMultiplyAdd(P20, ScaleG, Result);
5659
5660    return Result;
5661
5662#elif defined(_XM_SSE_INTRINSICS_)
5663	XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
5664	XMVECTOR SF = _mm_set_ps1(f);
5665	XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
5666	XMVECTOR SG = _mm_set_ps1(g);
5667	R1 = _mm_mul_ps(R1,SF);
5668	R2 = _mm_mul_ps(R2,SG);
5669	R1 = _mm_add_ps(R1,Position0);
5670	R1 = _mm_add_ps(R1,R2);
5671    return R1;
5672#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5673#endif // _XM_VMX128_INTRINSICS_
5674}
5675
5676//------------------------------------------------------------------------------
5677
5678XMFINLINE XMVECTOR XMVectorBaryCentricV
5679(
5680    FXMVECTOR Position0,
5681    FXMVECTOR Position1,
5682    FXMVECTOR Position2,
5683    CXMVECTOR F,
5684    CXMVECTOR G
5685)
5686{
5687#if defined(_XM_NO_INTRINSICS_)
5688
5689    // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
5690    XMVECTOR P10;
5691    XMVECTOR P20;
5692    XMVECTOR Result;
5693
5694    P10 = XMVectorSubtract(Position1, Position0);
5695    P20 = XMVectorSubtract(Position2, Position0);
5696
5697    Result = XMVectorMultiplyAdd(P10, F, Position0);
5698    Result = XMVectorMultiplyAdd(P20, G, Result);
5699
5700    return Result;
5701
5702#elif defined(_XM_SSE_INTRINSICS_)
5703	XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
5704	XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
5705	R1 = _mm_mul_ps(R1,F);
5706	R2 = _mm_mul_ps(R2,G);
5707	R1 = _mm_add_ps(R1,Position0);
5708	R1 = _mm_add_ps(R1,R2);
5709    return R1;
5710#else // _XM_VMX128_INTRINSICS_
5711#endif // _XM_VMX128_INTRINSICS_
5712}
5713
5714/****************************************************************************
5715 *
5716 * 2D Vector
5717 *
5718 ****************************************************************************/
5719
5720//------------------------------------------------------------------------------
5721// Comparison operations
5722//------------------------------------------------------------------------------
5723
5724//------------------------------------------------------------------------------
5725
5726XMFINLINE BOOL XMVector2Equal
5727(
5728    FXMVECTOR V1,
5729    FXMVECTOR V2
5730)
5731{
5732#if defined(_XM_NO_INTRINSICS_)
5733    return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0);
5734#elif defined(_XM_SSE_INTRINSICS_)
5735    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5736// z and w are don't care
5737    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5738#else // _XM_VMX128_INTRINSICS_
5739    return XMComparisonAllTrue(XMVector2EqualR(V1, V2));
5740#endif
5741}
5742
5743//------------------------------------------------------------------------------
5744
5745XMFINLINE UINT XMVector2EqualR
5746(
5747    FXMVECTOR V1,
5748    FXMVECTOR V2
5749)
5750{
5751#if defined(_XM_NO_INTRINSICS_)
5752
5753    UINT CR = 0;
5754
5755    if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
5756        (V1.vector4_f32[1] == V2.vector4_f32[1]))
5757    {
5758        CR = XM_CRMASK_CR6TRUE;
5759    }
5760    else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
5761        (V1.vector4_f32[1] != V2.vector4_f32[1]))
5762    {
5763        CR = XM_CRMASK_CR6FALSE;
5764    }
5765    return CR;
5766#elif defined(_XM_SSE_INTRINSICS_)
5767    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5768// z and w are don't care
5769    int iTest = _mm_movemask_ps(vTemp)&3;
5770    UINT CR = 0;
5771    if (iTest==3)
5772    {
5773        CR = XM_CRMASK_CR6TRUE;
5774    }
5775    else if (!iTest)
5776    {
5777        CR = XM_CRMASK_CR6FALSE;
5778    }
5779    return CR;
5780#else // _XM_VMX128_INTRINSICS_
5781#endif // _XM_VMX128_INTRINSICS_
5782}
5783
5784//------------------------------------------------------------------------------
5785
5786XMFINLINE BOOL XMVector2EqualInt
5787(
5788    FXMVECTOR V1,
5789    FXMVECTOR V2
5790)
5791{
5792#if defined(_XM_NO_INTRINSICS_)
5793    return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0);
5794#elif defined(_XM_SSE_INTRINSICS_)
5795    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5796    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0);
5797#else // _XM_VMX128_INTRINSICS_
5798    return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2));
5799#endif
5800}
5801
5802//------------------------------------------------------------------------------
5803
5804XMFINLINE UINT XMVector2EqualIntR
5805(
5806    FXMVECTOR V1,
5807    FXMVECTOR V2
5808)
5809{
5810#if defined(_XM_NO_INTRINSICS_)
5811
5812    UINT CR = 0;
5813    if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
5814        (V1.vector4_u32[1] == V2.vector4_u32[1]))
5815    {
5816        CR = XM_CRMASK_CR6TRUE;
5817    }
5818    else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
5819        (V1.vector4_u32[1] != V2.vector4_u32[1]))
5820    {
5821        CR = XM_CRMASK_CR6FALSE;
5822    }
5823    return CR;
5824
5825#elif defined(_XM_SSE_INTRINSICS_)
5826    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5827    int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3;
5828    UINT CR = 0;
5829    if (iTest==3)
5830    {
5831        CR = XM_CRMASK_CR6TRUE;
5832    }
5833    else if (!iTest)
5834    {
5835        CR = XM_CRMASK_CR6FALSE;
5836    }
5837	return CR;
5838#else // _XM_VMX128_INTRINSICS_
5839#endif // _XM_VMX128_INTRINSICS_
5840}
5841
5842//------------------------------------------------------------------------------
5843
5844XMFINLINE BOOL XMVector2NearEqual
5845(
5846    FXMVECTOR V1,
5847    FXMVECTOR V2,
5848    FXMVECTOR Epsilon
5849)
5850{
5851#if defined(_XM_NO_INTRINSICS_)
5852    FLOAT dx, dy;
5853    dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
5854    dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
5855    return ((dx <= Epsilon.vector4_f32[0]) &&
5856            (dy <= Epsilon.vector4_f32[1]));
5857#elif defined(_XM_SSE_INTRINSICS_)
5858    // Get the difference
5859    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
5860    // Get the absolute value of the difference
5861    XMVECTOR vTemp = _mm_setzero_ps();
5862    vTemp = _mm_sub_ps(vTemp,vDelta);
5863    vTemp = _mm_max_ps(vTemp,vDelta);
5864    vTemp = _mm_cmple_ps(vTemp,Epsilon);
5865    // z and w are don't care
5866    return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0);
5867#else // _XM_VMX128_INTRINSICS_
5868#endif // _XM_VMX128_INTRINSICS_
5869}
5870
5871//------------------------------------------------------------------------------
5872
5873XMFINLINE BOOL XMVector2NotEqual
5874(
5875    FXMVECTOR V1,
5876    FXMVECTOR V2
5877)
5878{
5879#if defined(_XM_NO_INTRINSICS_)
5880    return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0);
5881#elif defined(_XM_SSE_INTRINSICS_)
5882    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5883// z and w are don't care
5884    return (((_mm_movemask_ps(vTemp)&3)!=3) != 0);
5885#else // _XM_VMX128_INTRINSICS_
5886    return XMComparisonAnyFalse(XMVector2EqualR(V1, V2));
5887#endif
5888}
5889
5890//------------------------------------------------------------------------------
5891
5892XMFINLINE BOOL XMVector2NotEqualInt
5893(
5894    FXMVECTOR V1,
5895    FXMVECTOR V2
5896)
5897{
5898#if defined(_XM_NO_INTRINSICS_)
5899    return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0);
5900#elif defined(_XM_SSE_INTRINSICS_)
5901    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5902    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0);
5903#else // _XM_VMX128_INTRINSICS_
5904    return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2));
5905#endif
5906}
5907
5908//------------------------------------------------------------------------------
5909
5910XMFINLINE BOOL XMVector2Greater
5911(
5912    FXMVECTOR V1,
5913    FXMVECTOR V2
5914)
5915{
5916#if defined(_XM_NO_INTRINSICS_)
5917    return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0);
5918
5919#elif defined(_XM_SSE_INTRINSICS_)
5920    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
5921// z and w are don't care
5922    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5923#else // _XM_VMX128_INTRINSICS_
5924    return XMComparisonAllTrue(XMVector2GreaterR(V1, V2));
5925#endif
5926}
5927
5928//------------------------------------------------------------------------------
5929
5930XMFINLINE UINT XMVector2GreaterR
5931(
5932    FXMVECTOR V1,
5933    FXMVECTOR V2
5934)
5935{
5936#if defined(_XM_NO_INTRINSICS_)
5937
5938    UINT CR = 0;
5939    if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
5940        (V1.vector4_f32[1] > V2.vector4_f32[1]))
5941    {
5942        CR = XM_CRMASK_CR6TRUE;
5943    }
5944    else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
5945        (V1.vector4_f32[1] <= V2.vector4_f32[1]))
5946    {
5947        CR = XM_CRMASK_CR6FALSE;
5948    }
5949    return CR;
5950#elif defined(_XM_SSE_INTRINSICS_)
5951    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
5952    int iTest = _mm_movemask_ps(vTemp)&3;
5953    UINT CR = 0;
5954    if (iTest==3)
5955    {
5956        CR = XM_CRMASK_CR6TRUE;
5957    }
5958    else if (!iTest)
5959    {
5960        CR = XM_CRMASK_CR6FALSE;
5961    }
5962    return CR;
5963#else // _XM_VMX128_INTRINSICS_
5964#endif // _XM_VMX128_INTRINSICS_
5965}
5966
5967//------------------------------------------------------------------------------
5968
5969XMFINLINE BOOL XMVector2GreaterOrEqual
5970(
5971    FXMVECTOR V1,
5972    FXMVECTOR V2
5973)
5974{
5975#if defined(_XM_NO_INTRINSICS_)
5976    return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0);
5977#elif defined(_XM_SSE_INTRINSICS_)
5978    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
5979    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5980#else // _XM_VMX128_INTRINSICS_
5981    return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2));
5982#endif
5983}
5984
5985//------------------------------------------------------------------------------
5986
5987XMFINLINE UINT XMVector2GreaterOrEqualR
5988(
5989    FXMVECTOR V1,
5990    FXMVECTOR V2
5991)
5992{
5993#if defined(_XM_NO_INTRINSICS_)
5994    UINT CR = 0;
5995    if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
5996        (V1.vector4_f32[1] >= V2.vector4_f32[1]))
5997    {
5998        CR = XM_CRMASK_CR6TRUE;
5999    }
6000    else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
6001        (V1.vector4_f32[1] < V2.vector4_f32[1]))
6002    {
6003        CR = XM_CRMASK_CR6FALSE;
6004    }
6005    return CR;
6006
6007#elif defined(_XM_SSE_INTRINSICS_)
6008    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
6009    int iTest = _mm_movemask_ps(vTemp)&3;
6010    UINT CR = 0;
6011    if (iTest == 3)
6012    {
6013        CR = XM_CRMASK_CR6TRUE;
6014    }
6015    else if (!iTest)
6016    {
6017        CR = XM_CRMASK_CR6FALSE;
6018    }
6019    return CR;
6020#else // _XM_VMX128_INTRINSICS_
6021#endif // _XM_VMX128_INTRINSICS_
6022}
6023
6024//------------------------------------------------------------------------------
6025
6026XMFINLINE BOOL XMVector2Less
6027(
6028    FXMVECTOR V1,
6029    FXMVECTOR V2
6030)
6031{
6032#if defined(_XM_NO_INTRINSICS_)
6033    return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0);
6034#elif defined(_XM_SSE_INTRINSICS_)
6035    XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
6036    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
6037#else // _XM_VMX128_INTRINSICS_
6038    return XMComparisonAllTrue(XMVector2GreaterR(V2, V1));
6039#endif
6040}
6041
6042//------------------------------------------------------------------------------
6043
6044XMFINLINE BOOL XMVector2LessOrEqual
6045(
6046    FXMVECTOR V1,
6047    FXMVECTOR V2
6048)
6049{
6050#if defined(_XM_NO_INTRINSICS_)
6051    return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0);
6052#elif defined(_XM_SSE_INTRINSICS_)
6053    XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
6054    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
6055#else // _XM_VMX128_INTRINSICS_
6056    return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1));
6057#endif
6058}
6059
6060//------------------------------------------------------------------------------
6061
6062XMFINLINE BOOL XMVector2InBounds
6063(
6064    FXMVECTOR V,
6065    FXMVECTOR Bounds
6066)
6067{
6068 #if defined(_XM_NO_INTRINSICS_)
6069    return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
6070        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0);
6071 #elif defined(_XM_SSE_INTRINSICS_)
6072    // Test if less than or equal
6073    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
6074    // Negate the bounds
6075    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
6076    // Test if greater or equal (Reversed)
6077    vTemp2 = _mm_cmple_ps(vTemp2,V);
6078    // Blend answers
6079    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
6080    // x and y in bounds? (z and w are don't care)
6081    return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0);
6082#else // _XM_VMX128_INTRINSICS_
6083    return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds));
6084#endif
6085}
6086
6087//------------------------------------------------------------------------------
6088
6089XMFINLINE UINT XMVector2InBoundsR
6090(
6091    FXMVECTOR V,
6092    FXMVECTOR Bounds
6093)
6094{
6095#if defined(_XM_NO_INTRINSICS_)
6096    UINT CR = 0;
6097    if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
6098        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]))
6099    {
6100        CR = XM_CRMASK_CR6BOUNDS;
6101    }
6102    return CR;
6103
6104#elif defined(_XM_SSE_INTRINSICS_)
6105    // Test if less than or equal
6106    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
6107    // Negate the bounds
6108    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
6109    // Test if greater or equal (Reversed)
6110    vTemp2 = _mm_cmple_ps(vTemp2,V);
6111    // Blend answers
6112    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
6113    // x and y in bounds? (z and w are don't care)
6114    return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0;
6115#else // _XM_VMX128_INTRINSICS_
6116#endif // _XM_VMX128_INTRINSICS_
6117}
6118
6119//------------------------------------------------------------------------------
6120
6121XMFINLINE BOOL XMVector2IsNaN
6122(
6123    FXMVECTOR V
6124)
6125{
6126#if defined(_XM_NO_INTRINSICS_)
6127    return (XMISNAN(V.vector4_f32[0]) ||
6128            XMISNAN(V.vector4_f32[1]));
6129#elif defined(_XM_SSE_INTRINSICS_)
6130    // Mask off the exponent
6131    __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
6132    // Mask off the mantissa
6133    __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
6134    // Are any of the exponents == 0x7F800000?
6135    vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
6136    // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
6137    vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
6138    // Perform a not on the NaN test to be true on NON-zero mantissas
6139    vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
6140    // If x or y are NaN, the signs are true after the merge above
6141    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0);
6142#else // _XM_VMX128_INTRINSICS_
6143#endif // _XM_VMX128_INTRINSICS_
6144}
6145
6146//------------------------------------------------------------------------------
6147
6148XMFINLINE BOOL XMVector2IsInfinite
6149(
6150    FXMVECTOR V
6151)
6152{
6153#if defined(_XM_NO_INTRINSICS_)
6154
6155    return (XMISINF(V.vector4_f32[0]) ||
6156            XMISINF(V.vector4_f32[1]));
6157#elif defined(_XM_SSE_INTRINSICS_)
6158    // Mask off the sign bit
6159    __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
6160    // Compare to infinity
6161    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
6162    // If x or z are infinity, the signs are true.
6163    return ((_mm_movemask_ps(vTemp)&3) != 0);
6164#else // _XM_VMX128_INTRINSICS_
6165#endif // _XM_VMX128_INTRINSICS_
6166}
6167
6168//------------------------------------------------------------------------------
6169// Computation operations
6170//------------------------------------------------------------------------------
6171
6172//------------------------------------------------------------------------------
6173
6174XMFINLINE XMVECTOR XMVector2Dot
6175(
6176    FXMVECTOR V1,
6177    FXMVECTOR V2
6178)
6179{
6180#if defined(_XM_NO_INTRINSICS_)
6181
6182    XMVECTOR Result;
6183
6184    Result.vector4_f32[0] =
6185    Result.vector4_f32[1] =
6186    Result.vector4_f32[2] =
6187    Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1];
6188
6189    return Result;
6190
6191#elif defined(_XM_SSE_INTRINSICS_)
6192    // Perform the dot product on x and y
6193    XMVECTOR vLengthSq = _mm_mul_ps(V1,V2);
6194    // vTemp has y splatted
6195    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6196    // x+y
6197    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6198    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6199    return vLengthSq;
6200#else // _XM_VMX128_INTRINSICS_
6201#endif // _XM_VMX128_INTRINSICS_
6202}
6203
6204//------------------------------------------------------------------------------
6205
6206XMFINLINE XMVECTOR XMVector2Cross
6207(
6208    FXMVECTOR V1,
6209    FXMVECTOR V2
6210)
6211{
6212#if defined(_XM_NO_INTRINSICS_)
6213    FLOAT fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]);
6214    XMVECTOR vResult = {
6215        fCross,
6216        fCross,
6217        fCross,
6218        fCross
6219    };
6220    return vResult;
6221#elif defined(_XM_SSE_INTRINSICS_)
6222    // Swap x and y
6223    XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1));
6224    // Perform the muls
6225    vResult = _mm_mul_ps(vResult,V1);
6226    // Splat y
6227    XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1));
6228    // Sub the values
6229    vResult = _mm_sub_ss(vResult,vTemp);
6230    // Splat the cross product
6231    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0));
6232	return vResult;
6233#else // _XM_VMX128_INTRINSICS_
6234#endif // _XM_VMX128_INTRINSICS_
6235}
6236
6237//------------------------------------------------------------------------------
6238
6239XMFINLINE XMVECTOR XMVector2LengthSq
6240(
6241    FXMVECTOR V
6242)
6243{
6244#if defined(_XM_NO_INTRINSICS_)
6245    return XMVector2Dot(V, V);
6246#elif defined(_XM_SSE_INTRINSICS_)
6247    // Perform the dot product on x and y
6248    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6249    // vTemp has y splatted
6250    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6251    // x+y
6252    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6253    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6254    return vLengthSq;
6255#else
6256    return XMVector2Dot(V, V);
6257#endif
6258}
6259
6260//------------------------------------------------------------------------------
6261
6262XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst
6263(
6264    FXMVECTOR V
6265)
6266{
6267#if defined(_XM_NO_INTRINSICS_)
6268
6269    XMVECTOR Result;
6270
6271    Result = XMVector2LengthSq(V);
6272    Result = XMVectorReciprocalSqrtEst(Result);
6273
6274    return Result;
6275
6276#elif defined(_XM_SSE_INTRINSICS_)
6277    // Perform the dot product on x and y
6278    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6279    // vTemp has y splatted
6280    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6281    // x+y
6282    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6283    vLengthSq = _mm_rsqrt_ss(vLengthSq);
6284    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6285    return vLengthSq;
6286#else // _XM_VMX128_INTRINSICS_
6287#endif // _XM_VMX128_INTRINSICS_
6288}
6289
6290//------------------------------------------------------------------------------
6291
6292XMFINLINE XMVECTOR XMVector2ReciprocalLength
6293(
6294    FXMVECTOR V
6295)
6296{
6297#if defined(_XM_NO_INTRINSICS_)
6298
6299    XMVECTOR Result;
6300
6301    Result = XMVector2LengthSq(V);
6302    Result = XMVectorReciprocalSqrt(Result);
6303
6304    return Result;
6305
6306#elif defined(_XM_SSE_INTRINSICS_)
6307    // Perform the dot product on x and y
6308    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6309    // vTemp has y splatted
6310    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6311    // x+y
6312    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6313    vLengthSq = _mm_sqrt_ss(vLengthSq);
6314    vLengthSq = _mm_div_ss(g_XMOne,vLengthSq);
6315    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6316    return vLengthSq;
6317#else // _XM_VMX128_INTRINSICS_
6318#endif // _XM_VMX128_INTRINSICS_
6319}
6320
6321//------------------------------------------------------------------------------
6322
6323XMFINLINE XMVECTOR XMVector2LengthEst
6324(
6325    FXMVECTOR V
6326)
6327{
6328#if defined(_XM_NO_INTRINSICS_)
6329    XMVECTOR Result;
6330    Result = XMVector2LengthSq(V);
6331    Result = XMVectorSqrtEst(Result);
6332    return Result;
6333#elif defined(_XM_SSE_INTRINSICS_)
6334    // Perform the dot product on x and y
6335    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6336    // vTemp has y splatted
6337    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6338    // x+y
6339    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6340    vLengthSq = _mm_sqrt_ss(vLengthSq);
6341    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6342    return vLengthSq;
6343#else // _XM_VMX128_INTRINSICS_
6344#endif // _XM_VMX128_INTRINSICS_
6345}
6346
6347//------------------------------------------------------------------------------
6348
6349XMFINLINE XMVECTOR XMVector2Length
6350(
6351    FXMVECTOR V
6352)
6353{
6354#if defined(_XM_NO_INTRINSICS_)
6355
6356    XMVECTOR Result;
6357    Result = XMVector2LengthSq(V);
6358    Result = XMVectorSqrt(Result);
6359    return Result;
6360
6361#elif defined(_XM_SSE_INTRINSICS_)
6362    // Perform the dot product on x and y
6363    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6364    // vTemp has y splatted
6365    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6366    // x+y
6367    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6368    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6369    vLengthSq = _mm_sqrt_ps(vLengthSq);
6370    return vLengthSq;
6371#else // _XM_VMX128_INTRINSICS_
6372#endif // _XM_VMX128_INTRINSICS_
6373}
6374
6375//------------------------------------------------------------------------------
6376// XMVector2NormalizeEst uses a reciprocal estimate and
6377// returns QNaN on zero and infinite vectors.
6378
6379XMFINLINE XMVECTOR XMVector2NormalizeEst
6380(
6381    FXMVECTOR V
6382)
6383{
6384#if defined(_XM_NO_INTRINSICS_)
6385
6386    XMVECTOR Result;
6387    Result = XMVector2ReciprocalLength(V);
6388    Result = XMVectorMultiply(V, Result);
6389    return Result;
6390
6391#elif defined(_XM_SSE_INTRINSICS_)
6392    // Perform the dot product on x and y
6393    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6394    // vTemp has y splatted
6395    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6396    // x+y
6397    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6398    vLengthSq = _mm_rsqrt_ss(vLengthSq);
6399    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6400    vLengthSq = _mm_mul_ps(vLengthSq,V);
6401	return vLengthSq;
6402#else // _XM_VMX128_INTRINSICS_
6403#endif // _XM_VMX128_INTRINSICS_
6404}
6405
6406//------------------------------------------------------------------------------
6407
6408XMFINLINE XMVECTOR XMVector2Normalize
6409(
6410    FXMVECTOR V
6411)
6412{
6413#if defined(_XM_NO_INTRINSICS_)
6414    FLOAT fLength;
6415    XMVECTOR vResult;
6416
6417    vResult = XMVector2Length( V );
6418    fLength = vResult.vector4_f32[0];
6419
6420    // Prevent divide by zero
6421    if (fLength > 0) {
6422        fLength = 1.0f/fLength;
6423    }
6424
6425    vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
6426    vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
6427    vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
6428    vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
6429    return vResult;
6430
6431#elif defined(_XM_SSE_INTRINSICS_)
6432    // Perform the dot product on x and y only
6433    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6434    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6435    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6436	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6437    // Prepare for the division
6438    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
6439    // Create zero with a single instruction
6440    XMVECTOR vZeroMask = _mm_setzero_ps();
6441    // Test for a divide by zero (Must be FP to detect -0.0)
6442    vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
6443    // Failsafe on zero (Or epsilon) length planes
6444    // If the length is infinity, set the elements to zero
6445    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
6446    // Reciprocal mul to perform the normalization
6447    vResult = _mm_div_ps(V,vResult);
6448    // Any that are infinity, set to zero
6449    vResult = _mm_and_ps(vResult,vZeroMask);
6450    // Select qnan or result based on infinite length
6451	XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
6452    XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
6453    vResult = _mm_or_ps(vTemp1,vTemp2);
6454    return vResult;
6455#else // _XM_VMX128_INTRINSICS_
6456#endif // _XM_VMX128_INTRINSICS_
6457}
6458
6459//------------------------------------------------------------------------------
6460
6461XMFINLINE XMVECTOR XMVector2ClampLength
6462(
6463    FXMVECTOR V,
6464    FLOAT    LengthMin,
6465    FLOAT    LengthMax
6466)
6467{
6468#if defined(_XM_NO_INTRINSICS_)
6469
6470    XMVECTOR ClampMax;
6471    XMVECTOR ClampMin;
6472
6473    ClampMax = XMVectorReplicate(LengthMax);
6474    ClampMin = XMVectorReplicate(LengthMin);
6475
6476    return XMVector2ClampLengthV(V, ClampMin, ClampMax);
6477
6478#elif defined(_XM_SSE_INTRINSICS_)
6479    XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
6480    XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
6481    return XMVector2ClampLengthV(V, ClampMin, ClampMax);
6482#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
6483#endif // _XM_VMX128_INTRINSICS_
6484}
6485
6486//------------------------------------------------------------------------------
6487
6488XMFINLINE XMVECTOR XMVector2ClampLengthV
6489(
6490    FXMVECTOR V,
6491    FXMVECTOR LengthMin,
6492    FXMVECTOR LengthMax
6493)
6494{
6495#if defined(_XM_NO_INTRINSICS_)
6496
6497    XMVECTOR ClampLength;
6498    XMVECTOR LengthSq;
6499    XMVECTOR RcpLength;
6500    XMVECTOR Length;
6501    XMVECTOR Normal;
6502    XMVECTOR Zero;
6503    XMVECTOR InfiniteLength;
6504    XMVECTOR ZeroLength;
6505    XMVECTOR Select;
6506    XMVECTOR ControlMax;
6507    XMVECTOR ControlMin;
6508    XMVECTOR Control;
6509    XMVECTOR Result;
6510
6511    XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]));
6512    XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]));
6513    XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero()));
6514    XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero()));
6515    XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
6516
6517    LengthSq = XMVector2LengthSq(V);
6518
6519    Zero = XMVectorZero();
6520
6521    RcpLength = XMVectorReciprocalSqrt(LengthSq);
6522
6523    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
6524    ZeroLength = XMVectorEqual(LengthSq, Zero);
6525
6526    Length = XMVectorMultiply(LengthSq, RcpLength);
6527
6528    Normal = XMVectorMultiply(V, RcpLength);
6529
6530    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
6531    Length = XMVectorSelect(LengthSq, Length, Select);
6532    Normal = XMVectorSelect(LengthSq, Normal, Select);
6533
6534    ControlMax = XMVectorGreater(Length, LengthMax);
6535    ControlMin = XMVectorLess(Length, LengthMin);
6536
6537    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
6538    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
6539
6540    Result = XMVectorMultiply(Normal, ClampLength);
6541
6542    // Preserve the original vector (with no precision loss) if the length falls within the given range
6543    Control = XMVectorEqualInt(ControlMax, ControlMin);
6544    Result = XMVectorSelect(Result, V, Control);
6545
6546    return Result;
6547
6548#elif defined(_XM_SSE_INTRINSICS_)
6549    XMVECTOR ClampLength;
6550    XMVECTOR LengthSq;
6551    XMVECTOR RcpLength;
6552    XMVECTOR Length;
6553    XMVECTOR Normal;
6554    XMVECTOR InfiniteLength;
6555    XMVECTOR ZeroLength;
6556    XMVECTOR Select;
6557    XMVECTOR ControlMax;
6558    XMVECTOR ControlMin;
6559    XMVECTOR Control;
6560    XMVECTOR Result;
6561
6562    XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)));
6563    XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)));
6564    XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero));
6565    XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero));
6566    XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
6567    LengthSq = XMVector2LengthSq(V);
6568    RcpLength = XMVectorReciprocalSqrt(LengthSq);
6569    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
6570    ZeroLength = XMVectorEqual(LengthSq, g_XMZero);
6571    Length = _mm_mul_ps(LengthSq, RcpLength);
6572    Normal = _mm_mul_ps(V, RcpLength);
6573    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
6574    Length = XMVectorSelect(LengthSq, Length, Select);
6575    Normal = XMVectorSelect(LengthSq, Normal, Select);
6576    ControlMax = XMVectorGreater(Length, LengthMax);
6577    ControlMin = XMVectorLess(Length, LengthMin);
6578    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
6579    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
6580    Result = _mm_mul_ps(Normal, ClampLength);
6581    // Preserve the original vector (with no precision loss) if the length falls within the given range
6582    Control = XMVectorEqualInt(ControlMax, ControlMin);
6583    Result = XMVectorSelect(Result, V, Control);
6584    return Result;
6585#else // _XM_VMX128_INTRINSICS_
6586#endif // _XM_VMX128_INTRINSICS_
6587}
6588
6589//------------------------------------------------------------------------------
6590
6591XMFINLINE XMVECTOR XMVector2Reflect
6592(
6593    FXMVECTOR Incident,
6594    FXMVECTOR Normal
6595)
6596{
6597#if defined(_XM_NO_INTRINSICS_)
6598
6599    XMVECTOR Result;
6600
6601    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
6602    Result = XMVector2Dot(Incident, Normal);
6603    Result = XMVectorAdd(Result, Result);
6604    Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
6605
6606    return Result;
6607
6608#elif defined(_XM_SSE_INTRINSICS_)
6609    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
6610    XMVECTOR Result = XMVector2Dot(Incident,Normal);
6611    Result = _mm_add_ps(Result, Result);
6612    Result = _mm_mul_ps(Result, Normal);
6613    Result = _mm_sub_ps(Incident,Result);
6614    return Result;
6615#else // _XM_VMX128_INTRINSICS_
6616#endif // _XM_VMX128_INTRINSICS_
6617}
6618
6619//------------------------------------------------------------------------------
6620
6621XMFINLINE XMVECTOR XMVector2Refract
6622(
6623    FXMVECTOR Incident,
6624    FXMVECTOR Normal,
6625    FLOAT    RefractionIndex
6626)
6627{
6628#if defined(_XM_NO_INTRINSICS_)
6629    XMVECTOR Index;
6630    Index = XMVectorReplicate(RefractionIndex);
6631    return XMVector2RefractV(Incident, Normal, Index);
6632
6633#elif defined(_XM_SSE_INTRINSICS_)
6634    XMVECTOR Index = _mm_set_ps1(RefractionIndex);
6635    return XMVector2RefractV(Incident,Normal,Index);
6636#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
6637#endif // _XM_VMX128_INTRINSICS_
6638}
6639
6640//------------------------------------------------------------------------------
6641
6642// Return the refraction of a 2D vector
6643XMFINLINE XMVECTOR XMVector2RefractV
6644(
6645    FXMVECTOR Incident,
6646    FXMVECTOR Normal,
6647    FXMVECTOR RefractionIndex
6648)
6649{
6650#if defined(_XM_NO_INTRINSICS_)
6651    float IDotN;
6652    float RX,RY;
6653    XMVECTOR vResult;
6654    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
6655    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
6656    IDotN = (Incident.vector4_f32[0]*Normal.vector4_f32[0])+(Incident.vector4_f32[1]*Normal.vector4_f32[1]);
6657    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
6658    RY = 1.0f-(IDotN*IDotN);
6659    RX = 1.0f-(RY*RefractionIndex.vector4_f32[0]*RefractionIndex.vector4_f32[0]);
6660    RY = 1.0f-(RY*RefractionIndex.vector4_f32[1]*RefractionIndex.vector4_f32[1]);
6661    if (RX>=0.0f) {
6662        RX = (RefractionIndex.vector4_f32[0]*Incident.vector4_f32[0])-(Normal.vector4_f32[0]*((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX)));
6663    } else {
6664        RX = 0.0f;
6665    }
6666    if (RY>=0.0f) {
6667        RY = (RefractionIndex.vector4_f32[1]*Incident.vector4_f32[1])-(Normal.vector4_f32[1]*((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY)));
6668    } else {
6669        RY = 0.0f;
6670    }
6671    vResult.vector4_f32[0] = RX;
6672    vResult.vector4_f32[1] = RY;
6673    vResult.vector4_f32[2] = 0.0f;
6674    vResult.vector4_f32[3] = 0.0f;
6675    return vResult;
6676#elif defined(_XM_SSE_INTRINSICS_)
6677    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
6678    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
6679    // Get the 2D Dot product of Incident-Normal
6680    XMVECTOR IDotN = _mm_mul_ps(Incident,Normal);
6681    XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1));
6682    IDotN = _mm_add_ss(IDotN,vTemp);
6683    IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0));
6684    // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
6685    vTemp = _mm_mul_ps(IDotN,IDotN);
6686    vTemp = _mm_sub_ps(g_XMOne,vTemp);
6687    vTemp = _mm_mul_ps(vTemp,RefractionIndex);
6688    vTemp = _mm_mul_ps(vTemp,RefractionIndex);
6689    vTemp = _mm_sub_ps(g_XMOne,vTemp);
6690    // If any terms are <=0, sqrt() will fail, punt to zero
6691    XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero);
6692    // R = RefractionIndex * IDotN + sqrt(R)
6693    vTemp = _mm_sqrt_ps(vTemp);
6694    XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN);
6695    vTemp = _mm_add_ps(vTemp,vResult);
6696    // Result = RefractionIndex * Incident - Normal * R
6697    vResult = _mm_mul_ps(RefractionIndex,Incident);
6698    vTemp = _mm_mul_ps(vTemp,Normal);
6699    vResult = _mm_sub_ps(vResult,vTemp);
6700    vResult = _mm_and_ps(vResult,vMask);
6701    return vResult;
6702#else // _XM_VMX128_INTRINSICS_
6703#endif // _XM_VMX128_INTRINSICS_
6704}
6705
6706//------------------------------------------------------------------------------
6707
6708XMFINLINE XMVECTOR XMVector2Orthogonal
6709(
6710    FXMVECTOR V
6711)
6712{
6713#if defined(_XM_NO_INTRINSICS_)
6714
6715    XMVECTOR Result;
6716
6717    Result.vector4_f32[0] = -V.vector4_f32[1];
6718    Result.vector4_f32[1] = V.vector4_f32[0];
6719
6720    return Result;
6721
6722#elif defined(_XM_SSE_INTRINSICS_)
6723    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
6724    vResult = _mm_mul_ps(vResult,g_XMNegateX);
6725    return vResult;
6726#else // _XM_VMX128_INTRINSICS_
6727#endif // _XM_VMX128_INTRINSICS_
6728}
6729
6730//------------------------------------------------------------------------------
6731
6732XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst
6733(
6734    FXMVECTOR N1,
6735    FXMVECTOR N2
6736)
6737{
6738#if defined(_XM_NO_INTRINSICS_)
6739
6740    XMVECTOR NegativeOne;
6741    XMVECTOR One;
6742    XMVECTOR Result;
6743
6744    Result = XMVector2Dot(N1, N2);
6745    NegativeOne = XMVectorSplatConstant(-1, 0);
6746    One = XMVectorSplatOne();
6747    Result = XMVectorClamp(Result, NegativeOne, One);
6748    Result = XMVectorACosEst(Result);
6749
6750    return Result;
6751
6752#elif defined(_XM_SSE_INTRINSICS_)
6753    XMVECTOR vResult = XMVector2Dot(N1,N2);
6754    // Clamp to -1.0f to 1.0f
6755	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
6756	vResult = _mm_min_ps(vResult,g_XMOne);;
6757    vResult = XMVectorACosEst(vResult);
6758    return vResult;
6759#else // _XM_VMX128_INTRINSICS_
6760#endif // _XM_VMX128_INTRINSICS_
6761}
6762
6763//------------------------------------------------------------------------------
6764
6765XMFINLINE XMVECTOR XMVector2AngleBetweenNormals
6766(
6767    FXMVECTOR N1,
6768    FXMVECTOR N2
6769)
6770{
6771#if defined(_XM_NO_INTRINSICS_)
6772
6773    XMVECTOR NegativeOne;
6774    XMVECTOR One;
6775    XMVECTOR Result;
6776
6777    Result = XMVector2Dot(N1, N2);
6778    NegativeOne = XMVectorSplatConstant(-1, 0);
6779    One = XMVectorSplatOne();
6780    Result = XMVectorClamp(Result, NegativeOne, One);
6781    Result = XMVectorACos(Result);
6782
6783    return Result;
6784
6785#elif defined(_XM_SSE_INTRINSICS_)
6786    XMVECTOR vResult = XMVector2Dot(N1,N2);
6787    // Clamp to -1.0f to 1.0f
6788	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
6789	vResult = _mm_min_ps(vResult,g_XMOne);;
6790    vResult = XMVectorACos(vResult);
6791    return vResult;
6792#else // _XM_VMX128_INTRINSICS_
6793#endif // _XM_VMX128_INTRINSICS_
6794}
6795
6796//------------------------------------------------------------------------------
6797
6798XMFINLINE XMVECTOR XMVector2AngleBetweenVectors
6799(
6800    FXMVECTOR V1,
6801    FXMVECTOR V2
6802)
6803{
6804#if defined(_XM_NO_INTRINSICS_)
6805
6806    XMVECTOR L1;
6807    XMVECTOR L2;
6808    XMVECTOR Dot;
6809    XMVECTOR CosAngle;
6810    XMVECTOR NegativeOne;
6811    XMVECTOR One;
6812    XMVECTOR Result;
6813
6814    L1 = XMVector2ReciprocalLength(V1);
6815    L2 = XMVector2ReciprocalLength(V2);
6816
6817    Dot = XMVector2Dot(V1, V2);
6818
6819    L1 = XMVectorMultiply(L1, L2);
6820
6821    CosAngle = XMVectorMultiply(Dot, L1);
6822    NegativeOne = XMVectorSplatConstant(-1, 0);
6823    One = XMVectorSplatOne();
6824    CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
6825
6826    Result = XMVectorACos(CosAngle);
6827
6828    return Result;
6829
6830#elif defined(_XM_SSE_INTRINSICS_)
6831    XMVECTOR L1;
6832    XMVECTOR L2;
6833    XMVECTOR Dot;
6834    XMVECTOR CosAngle;
6835    XMVECTOR Result;
6836    L1 = XMVector2ReciprocalLength(V1);
6837    L2 = XMVector2ReciprocalLength(V2);
6838    Dot = XMVector2Dot(V1, V2);
6839    L1 = _mm_mul_ps(L1, L2);
6840    CosAngle = _mm_mul_ps(Dot, L1);
6841    CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne);
6842    Result = XMVectorACos(CosAngle);
6843    return Result;
6844#else // _XM_VMX128_INTRINSICS_
6845#endif // _XM_VMX128_INTRINSICS_
6846}
6847
6848//------------------------------------------------------------------------------
6849
6850XMFINLINE XMVECTOR XMVector2LinePointDistance
6851(
6852    FXMVECTOR LinePoint1,
6853    FXMVECTOR LinePoint2,
6854    FXMVECTOR Point
6855)
6856{
6857#if defined(_XM_NO_INTRINSICS_)
6858
6859    XMVECTOR PointVector;
6860    XMVECTOR LineVector;
6861    XMVECTOR ReciprocalLengthSq;
6862    XMVECTOR PointProjectionScale;
6863    XMVECTOR DistanceVector;
6864    XMVECTOR Result;
6865
6866    // Given a vector PointVector from LinePoint1 to Point and a vector
6867    // LineVector from LinePoint1 to LinePoint2, the scaled distance
6868    // PointProjectionScale from LinePoint1 to the perpendicular projection
6869    // of PointVector onto the line is defined as:
6870    //
6871    //     PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
6872
6873    PointVector = XMVectorSubtract(Point, LinePoint1);
6874    LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
6875
6876    ReciprocalLengthSq = XMVector2LengthSq(LineVector);
6877    ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
6878
6879    PointProjectionScale = XMVector2Dot(PointVector, LineVector);
6880    PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
6881
6882    DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
6883    DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
6884
6885    Result = XMVector2Length(DistanceVector);
6886
6887    return Result;
6888
6889#elif defined(_XM_SSE_INTRINSICS_)
6890    XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
6891    XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
6892    XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector);
6893    XMVECTOR vResult = XMVector2Dot(PointVector,LineVector);
6894    vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
6895    vResult = _mm_mul_ps(vResult,LineVector);
6896    vResult = _mm_sub_ps(PointVector,vResult);
6897    vResult = XMVector2Length(vResult);
6898    return vResult;
6899#else // _XM_VMX128_INTRINSICS_
6900#endif // _XM_VMX128_INTRINSICS_
6901}
6902
6903//------------------------------------------------------------------------------
6904
6905XMFINLINE XMVECTOR XMVector2IntersectLine
6906(
6907    FXMVECTOR Line1Point1,
6908    FXMVECTOR Line1Point2,
6909    FXMVECTOR Line2Point1,
6910    CXMVECTOR Line2Point2
6911)
6912{
6913#if defined(_XM_NO_INTRINSICS_)
6914
6915    XMVECTOR        V1;
6916    XMVECTOR        V2;
6917    XMVECTOR        V3;
6918    XMVECTOR        C1;
6919    XMVECTOR        C2;
6920    XMVECTOR        Result;
6921    CONST XMVECTOR  Zero = XMVectorZero();
6922
6923    V1 = XMVectorSubtract(Line1Point2, Line1Point1);
6924    V2 = XMVectorSubtract(Line2Point2, Line2Point1);
6925    V3 = XMVectorSubtract(Line1Point1, Line2Point1);
6926
6927    C1 = XMVector2Cross(V1, V2);
6928    C2 = XMVector2Cross(V2, V3);
6929
6930    if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v))
6931    {
6932        if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v))
6933        {
6934            // Coincident
6935            Result = g_XMInfinity.v;
6936        }
6937        else
6938        {
6939            // Parallel
6940            Result = g_XMQNaN.v;
6941        }
6942    }
6943    else
6944    {
6945        // Intersection point = Line1Point1 + V1 * (C2 / C1)
6946        XMVECTOR Scale;
6947        Scale = XMVectorReciprocal(C1);
6948        Scale = XMVectorMultiply(C2, Scale);
6949        Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1);
6950    }
6951
6952    return Result;
6953
6954#elif defined(_XM_SSE_INTRINSICS_)
6955    XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1);
6956    XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1);
6957    XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1);
6958    // Generate the cross products
6959    XMVECTOR C1 = XMVector2Cross(V1, V2);
6960    XMVECTOR C2 = XMVector2Cross(V2, V3);
6961    // If C1 is not close to epsilon, use the calculated value
6962    XMVECTOR vResultMask = _mm_setzero_ps();
6963    vResultMask = _mm_sub_ps(vResultMask,C1);
6964    vResultMask = _mm_max_ps(vResultMask,C1);
6965    // 0xFFFFFFFF if the calculated value is to be used
6966    vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon);
6967    // If C1 is close to epsilon, which fail type is it? INFINITY or NAN?
6968    XMVECTOR vFailMask = _mm_setzero_ps();
6969    vFailMask = _mm_sub_ps(vFailMask,C2);
6970    vFailMask = _mm_max_ps(vFailMask,C2);
6971    vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon);
6972    XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity);
6973    vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN);
6974    // vFail is NAN or INF
6975    vFail = _mm_or_ps(vFail,vFailMask);
6976    // Intersection point = Line1Point1 + V1 * (C2 / C1)
6977    XMVECTOR vResult = _mm_div_ps(C2,C1);
6978    vResult = _mm_mul_ps(vResult,V1);
6979    vResult = _mm_add_ps(vResult,Line1Point1);
6980    // Use result, or failure value
6981    vResult = _mm_and_ps(vResult,vResultMask);
6982    vResultMask = _mm_andnot_ps(vResultMask,vFail);
6983    vResult = _mm_or_ps(vResult,vResultMask);
6984    return vResult;
6985#else // _XM_VMX128_INTRINSICS_
6986#endif // _XM_VMX128_INTRINSICS_
6987}
6988
6989//------------------------------------------------------------------------------
6990
6991XMFINLINE XMVECTOR XMVector2Transform
6992(
6993    FXMVECTOR V,
6994    CXMMATRIX M
6995)
6996{
6997#if defined(_XM_NO_INTRINSICS_)
6998
6999    XMVECTOR X;
7000    XMVECTOR Y;
7001    XMVECTOR Result;
7002
7003    Y = XMVectorSplatY(V);
7004    X = XMVectorSplatX(V);
7005
7006    Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7007    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7008
7009    return Result;
7010
7011#elif defined(_XM_SSE_INTRINSICS_)
7012    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7013    vResult = _mm_mul_ps(vResult,M.r[0]);
7014    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7015    vTemp = _mm_mul_ps(vTemp,M.r[1]);
7016    vResult = _mm_add_ps(vResult,vTemp);
7017    vResult = _mm_add_ps(vResult,M.r[3]);
7018    return vResult;
7019#else // _XM_VMX128_INTRINSICS_
7020#endif // _XM_VMX128_INTRINSICS_
7021}
7022
7023//------------------------------------------------------------------------------
7024
7025XMINLINE XMFLOAT4* XMVector2TransformStream
7026(
7027    XMFLOAT4*       pOutputStream,
7028    UINT            OutputStride,
7029    CONST XMFLOAT2* pInputStream,
7030    UINT            InputStride,
7031    UINT            VectorCount,
7032    CXMMATRIX        M
7033)
7034{
7035#if defined(_XM_NO_INTRINSICS_)
7036
7037    XMVECTOR V;
7038    XMVECTOR X;
7039    XMVECTOR Y;
7040    XMVECTOR Result;
7041    UINT     i;
7042    BYTE*    pInputVector = (BYTE*)pInputStream;
7043    BYTE*    pOutputVector = (BYTE*)pOutputStream;
7044
7045    XMASSERT(pOutputStream);
7046    XMASSERT(pInputStream);
7047
7048    for (i = 0; i < VectorCount; i++)
7049    {
7050        V = XMLoadFloat2((XMFLOAT2*)pInputVector);
7051        Y = XMVectorSplatY(V);
7052        X = XMVectorSplatX(V);
7053//        Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7054//        X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7055
7056        Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7057        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7058
7059        XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
7060
7061        pInputVector += InputStride;
7062        pOutputVector += OutputStride;
7063    }
7064
7065    return pOutputStream;
7066
7067#elif defined(_XM_SSE_INTRINSICS_)
7068	XMASSERT(pOutputStream);
7069	XMASSERT(pInputStream);
7070    UINT i;
7071    const BYTE* pInputVector = (const BYTE*)pInputStream;
7072    BYTE* pOutputVector = (BYTE*)pOutputStream;
7073
7074    for (i = 0; i < VectorCount; i++)
7075    {
7076        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
7077        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
7078        vResult = _mm_mul_ps(vResult,M.r[1]);
7079        vResult = _mm_add_ps(vResult,M.r[3]);
7080        X = _mm_mul_ps(X,M.r[0]);
7081        vResult = _mm_add_ps(vResult,X);
7082        _mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult);
7083        pInputVector += InputStride;
7084        pOutputVector += OutputStride;
7085    }
7086    return pOutputStream;
7087#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7088#endif // _XM_VMX128_INTRINSICS_
7089}
7090
7091//------------------------------------------------------------------------------
7092
7093XMINLINE XMFLOAT4* XMVector2TransformStreamNC
7094(
7095    XMFLOAT4*       pOutputStream,
7096    UINT            OutputStride,
7097    CONST XMFLOAT2* pInputStream,
7098    UINT            InputStride,
7099    UINT            VectorCount,
7100    CXMMATRIX     M
7101)
7102{
7103#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
7104	return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
7105#else // _XM_VMX128_INTRINSICS_
7106#endif // _XM_VMX128_INTRINSICS_
7107}
7108
7109//------------------------------------------------------------------------------
7110
7111XMFINLINE XMVECTOR XMVector2TransformCoord
7112(
7113    FXMVECTOR V,
7114    CXMMATRIX M
7115)
7116{
7117#if defined(_XM_NO_INTRINSICS_)
7118
7119    XMVECTOR X;
7120    XMVECTOR Y;
7121    XMVECTOR InverseW;
7122    XMVECTOR Result;
7123
7124    Y = XMVectorSplatY(V);
7125    X = XMVectorSplatX(V);
7126
7127    Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7128    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7129
7130    InverseW = XMVectorSplatW(Result);
7131    InverseW = XMVectorReciprocal(InverseW);
7132
7133    Result = XMVectorMultiply(Result, InverseW);
7134
7135    return Result;
7136
7137#elif defined(_XM_SSE_INTRINSICS_)
7138    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7139    vResult = _mm_mul_ps(vResult,M.r[0]);
7140    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7141    vTemp = _mm_mul_ps(vTemp,M.r[1]);
7142    vResult = _mm_add_ps(vResult,vTemp);
7143    vResult = _mm_add_ps(vResult,M.r[3]);
7144    vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
7145    vResult = _mm_div_ps(vResult,vTemp);
7146    return vResult;
7147#else // _XM_VMX128_INTRINSICS_
7148#endif // _XM_VMX128_INTRINSICS_
7149}
7150
7151//------------------------------------------------------------------------------
7152
7153XMINLINE XMFLOAT2* XMVector2TransformCoordStream
7154(
7155    XMFLOAT2*       pOutputStream,
7156    UINT            OutputStride,
7157    CONST XMFLOAT2* pInputStream,
7158    UINT            InputStride,
7159    UINT            VectorCount,
7160    CXMMATRIX     M
7161)
7162{
7163#if defined(_XM_NO_INTRINSICS_)
7164
7165    XMVECTOR V;
7166    XMVECTOR X;
7167    XMVECTOR Y;
7168    XMVECTOR InverseW;
7169    XMVECTOR Result;
7170    UINT     i;
7171    BYTE*    pInputVector = (BYTE*)pInputStream;
7172    BYTE*    pOutputVector = (BYTE*)pOutputStream;
7173
7174    XMASSERT(pOutputStream);
7175    XMASSERT(pInputStream);
7176
7177    for (i = 0; i < VectorCount; i++)
7178    {
7179        V = XMLoadFloat2((XMFLOAT2*)pInputVector);
7180        Y = XMVectorSplatY(V);
7181        X = XMVectorSplatX(V);
7182//        Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7183//        X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7184
7185        Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7186        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7187
7188        InverseW = XMVectorSplatW(Result);
7189        InverseW = XMVectorReciprocal(InverseW);
7190
7191        Result = XMVectorMultiply(Result, InverseW);
7192
7193        XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
7194
7195        pInputVector += InputStride;
7196        pOutputVector += OutputStride;
7197    }
7198
7199    return pOutputStream;
7200
7201#elif defined(_XM_SSE_INTRINSICS_)
7202    XMASSERT(pOutputStream);
7203    XMASSERT(pInputStream);
7204    UINT i;
7205    const BYTE *pInputVector = (BYTE*)pInputStream;
7206    BYTE *pOutputVector = (BYTE*)pOutputStream;
7207
7208    for (i = 0; i < VectorCount; i++)
7209    {
7210        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
7211        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
7212        vResult = _mm_mul_ps(vResult,M.r[1]);
7213        vResult = _mm_add_ps(vResult,M.r[3]);
7214        X = _mm_mul_ps(X,M.r[0]);
7215        vResult = _mm_add_ps(vResult,X);
7216        X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
7217        vResult = _mm_div_ps(vResult,X);
7218        _mm_store_sd(reinterpret_cast<double *>(pOutputVector),reinterpret_cast<__m128d *>(&vResult)[0]);
7219        pInputVector += InputStride;
7220        pOutputVector += OutputStride;
7221    }
7222    return pOutputStream;
7223#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7224#endif // _XM_VMX128_INTRINSICS_
7225}
7226
7227//------------------------------------------------------------------------------
7228
7229XMFINLINE XMVECTOR XMVector2TransformNormal
7230(
7231    FXMVECTOR V,
7232    CXMMATRIX M
7233)
7234{
7235#if defined(_XM_NO_INTRINSICS_)
7236
7237    XMVECTOR X;
7238    XMVECTOR Y;
7239    XMVECTOR Result;
7240
7241    Y = XMVectorSplatY(V);
7242    X = XMVectorSplatX(V);
7243
7244    Result = XMVectorMultiply(Y, M.r[1]);
7245    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7246
7247    return Result;
7248
7249#elif defined(_XM_SSE_INTRINSICS_)
7250    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7251    vResult = _mm_mul_ps(vResult,M.r[0]);
7252    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7253    vTemp = _mm_mul_ps(vTemp,M.r[1]);
7254    vResult = _mm_add_ps(vResult,vTemp);
7255    return vResult;
7256#else // _XM_VMX128_INTRINSICS_
7257#endif // _XM_VMX128_INTRINSICS_
7258}
7259
7260//------------------------------------------------------------------------------
7261
7262XMINLINE XMFLOAT2* XMVector2TransformNormalStream
7263(
7264    XMFLOAT2*       pOutputStream,
7265    UINT            OutputStride,
7266    CONST XMFLOAT2* pInputStream,
7267    UINT            InputStride,
7268    UINT            VectorCount,
7269    CXMMATRIX        M
7270)
7271{
7272#if defined(_XM_NO_INTRINSICS_)
7273
7274    XMVECTOR V;
7275    XMVECTOR X;
7276    XMVECTOR Y;
7277    XMVECTOR Result;
7278    UINT     i;
7279    BYTE*    pInputVector = (BYTE*)pInputStream;
7280    BYTE*    pOutputVector = (BYTE*)pOutputStream;
7281
7282    XMASSERT(pOutputStream);
7283    XMASSERT(pInputStream);
7284
7285    for (i = 0; i < VectorCount; i++)
7286    {
7287        V = XMLoadFloat2((XMFLOAT2*)pInputVector);
7288        Y = XMVectorSplatY(V);
7289        X = XMVectorSplatX(V);
7290//        Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7291//        X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7292
7293        Result = XMVectorMultiply(Y, M.r[1]);
7294        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7295
7296        XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
7297
7298        pInputVector += InputStride;
7299        pOutputVector += OutputStride;
7300    }
7301
7302    return pOutputStream;
7303
7304#elif defined(_XM_SSE_INTRINSICS_)
7305    XMASSERT(pOutputStream);
7306    XMASSERT(pInputStream);
7307    UINT i;
7308    const BYTE*pInputVector = (const BYTE*)pInputStream;
7309    BYTE *pOutputVector = (BYTE*)pOutputStream;
7310    for (i = 0; i < VectorCount; i++)
7311    {
7312        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x);
7313        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y);
7314        vResult = _mm_mul_ps(vResult,M.r[1]);
7315        X = _mm_mul_ps(X,M.r[0]);
7316        vResult = _mm_add_ps(vResult,X);
7317        _mm_store_sd(reinterpret_cast<double*>(pOutputVector),reinterpret_cast<const __m128d *>(&vResult)[0]);
7318
7319        pInputVector += InputStride;
7320        pOutputVector += OutputStride;
7321    }
7322
7323    return pOutputStream;
7324#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7325#endif // _XM_VMX128_INTRINSICS_
7326}
7327
7328/****************************************************************************
7329 *
7330 * 3D Vector
7331 *
7332 ****************************************************************************/
7333
7334//------------------------------------------------------------------------------
7335// Comparison operations
7336//------------------------------------------------------------------------------
7337
7338//------------------------------------------------------------------------------
7339
7340XMFINLINE BOOL XMVector3Equal
7341(
7342    FXMVECTOR V1,
7343    FXMVECTOR V2
7344)
7345{
7346#if defined(_XM_NO_INTRINSICS_)
7347    return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0);
7348#elif defined(_XM_SSE_INTRINSICS_)
7349    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7350    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7351#else // _XM_VMX128_INTRINSICS_
7352    return XMComparisonAllTrue(XMVector3EqualR(V1, V2));
7353#endif
7354}
7355
7356//------------------------------------------------------------------------------
7357
7358XMFINLINE UINT XMVector3EqualR
7359(
7360    FXMVECTOR V1,
7361    FXMVECTOR V2
7362)
7363{
7364#if defined(_XM_NO_INTRINSICS_)
7365    UINT CR = 0;
7366    if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
7367        (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
7368        (V1.vector4_f32[2] == V2.vector4_f32[2]))
7369    {
7370        CR = XM_CRMASK_CR6TRUE;
7371    }
7372    else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
7373        (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
7374        (V1.vector4_f32[2] != V2.vector4_f32[2]))
7375    {
7376        CR = XM_CRMASK_CR6FALSE;
7377    }
7378    return CR;
7379#elif defined(_XM_SSE_INTRINSICS_)
7380    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7381    int iTest = _mm_movemask_ps(vTemp)&7;
7382    UINT CR = 0;
7383    if (iTest==7)
7384    {
7385        CR = XM_CRMASK_CR6TRUE;
7386    }
7387    else if (!iTest)
7388    {
7389        CR = XM_CRMASK_CR6FALSE;
7390    }
7391    return CR;
7392#else // _XM_VMX128_INTRINSICS_
7393#endif // _XM_VMX128_INTRINSICS_
7394}
7395
7396//------------------------------------------------------------------------------
7397
7398XMFINLINE BOOL XMVector3EqualInt
7399(
7400    FXMVECTOR V1,
7401    FXMVECTOR V2
7402)
7403{
7404#if defined(_XM_NO_INTRINSICS_)
7405    return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0);
7406#elif defined(_XM_SSE_INTRINSICS_)
7407    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7408    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0);
7409#else // _XM_VMX128_INTRINSICS_
7410    return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2));
7411#endif
7412}
7413
7414//------------------------------------------------------------------------------
7415
7416XMFINLINE UINT XMVector3EqualIntR
7417(
7418    FXMVECTOR V1,
7419    FXMVECTOR V2
7420)
7421{
7422#if defined(_XM_NO_INTRINSICS_)
7423    UINT CR = 0;
7424    if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
7425        (V1.vector4_u32[1] == V2.vector4_u32[1]) &&
7426        (V1.vector4_u32[2] == V2.vector4_u32[2]))
7427    {
7428        CR = XM_CRMASK_CR6TRUE;
7429    }
7430    else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
7431        (V1.vector4_u32[1] != V2.vector4_u32[1]) &&
7432        (V1.vector4_u32[2] != V2.vector4_u32[2]))
7433    {
7434        CR = XM_CRMASK_CR6FALSE;
7435    }
7436    return CR;
7437#elif defined(_XM_SSE_INTRINSICS_)
7438    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7439    int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7;
7440    UINT CR = 0;
7441    if (iTemp==7)
7442    {
7443        CR = XM_CRMASK_CR6TRUE;
7444    }
7445    else if (!iTemp)
7446    {
7447        CR = XM_CRMASK_CR6FALSE;
7448    }
7449    return CR;
7450#else // _XM_VMX128_INTRINSICS_
7451#endif // _XM_VMX128_INTRINSICS_
7452}
7453
7454//------------------------------------------------------------------------------
7455
7456XMFINLINE BOOL XMVector3NearEqual
7457(
7458    FXMVECTOR V1,
7459    FXMVECTOR V2,
7460    FXMVECTOR Epsilon
7461)
7462{
7463#if defined(_XM_NO_INTRINSICS_)
7464    FLOAT dx, dy, dz;
7465
7466    dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
7467    dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
7468    dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
7469    return (((dx <= Epsilon.vector4_f32[0]) &&
7470            (dy <= Epsilon.vector4_f32[1]) &&
7471            (dz <= Epsilon.vector4_f32[2])) != 0);
7472#elif defined(_XM_SSE_INTRINSICS_)
7473    // Get the difference
7474    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
7475    // Get the absolute value of the difference
7476    XMVECTOR vTemp = _mm_setzero_ps();
7477    vTemp = _mm_sub_ps(vTemp,vDelta);
7478    vTemp = _mm_max_ps(vTemp,vDelta);
7479    vTemp = _mm_cmple_ps(vTemp,Epsilon);
7480    // w is don't care
7481    return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0);
7482#else // _XM_VMX128_INTRINSICS_
7483#endif // _XM_VMX128_INTRINSICS_
7484}
7485
7486//------------------------------------------------------------------------------
7487
7488XMFINLINE BOOL XMVector3NotEqual
7489(
7490    FXMVECTOR V1,
7491    FXMVECTOR V2
7492)
7493{
7494#if defined(_XM_NO_INTRINSICS_)
7495    return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0);
7496#elif defined(_XM_SSE_INTRINSICS_)
7497    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7498    return (((_mm_movemask_ps(vTemp)&7)!=7) != 0);
7499#else // _XM_VMX128_INTRINSICS_
7500    return XMComparisonAnyFalse(XMVector3EqualR(V1, V2));
7501#endif
7502}
7503
7504//------------------------------------------------------------------------------
7505
7506XMFINLINE BOOL XMVector3NotEqualInt
7507(
7508    FXMVECTOR V1,
7509    FXMVECTOR V2
7510)
7511{
7512#if defined(_XM_NO_INTRINSICS_)
7513    return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0);
7514#elif defined(_XM_SSE_INTRINSICS_)
7515    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7516    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0);
7517#else // _XM_VMX128_INTRINSICS_
7518    return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2));
7519#endif
7520}
7521
7522//------------------------------------------------------------------------------
7523
7524XMFINLINE BOOL XMVector3Greater
7525(
7526    FXMVECTOR V1,
7527    FXMVECTOR V2
7528)
7529{
7530#if defined(_XM_NO_INTRINSICS_)
7531    return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0);
7532#elif defined(_XM_SSE_INTRINSICS_)
7533    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
7534    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7535#else // _XM_VMX128_INTRINSICS_
7536    return XMComparisonAllTrue(XMVector3GreaterR(V1, V2));
7537#endif
7538}
7539
7540//------------------------------------------------------------------------------
7541
7542XMFINLINE UINT XMVector3GreaterR
7543(
7544    FXMVECTOR V1,
7545    FXMVECTOR V2
7546)
7547{
7548#if defined(_XM_NO_INTRINSICS_)
7549    UINT CR = 0;
7550    if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
7551        (V1.vector4_f32[1] > V2.vector4_f32[1]) &&
7552        (V1.vector4_f32[2] > V2.vector4_f32[2]))
7553    {
7554        CR = XM_CRMASK_CR6TRUE;
7555    }
7556    else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
7557        (V1.vector4_f32[1] <= V2.vector4_f32[1]) &&
7558        (V1.vector4_f32[2] <= V2.vector4_f32[2]))
7559    {
7560        CR = XM_CRMASK_CR6FALSE;
7561    }
7562    return CR;
7563
7564#elif defined(_XM_SSE_INTRINSICS_)
7565    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
7566    UINT CR = 0;
7567    int iTest = _mm_movemask_ps(vTemp)&7;
7568    if (iTest==7)
7569    {
7570        CR =  XM_CRMASK_CR6TRUE;
7571    }
7572    else if (!iTest)
7573    {
7574        CR = XM_CRMASK_CR6FALSE;
7575    }
7576    return CR;
7577#else // _XM_VMX128_INTRINSICS_
7578#endif // _XM_VMX128_INTRINSICS_
7579}
7580
7581//------------------------------------------------------------------------------
7582
7583XMFINLINE BOOL XMVector3GreaterOrEqual
7584(
7585    FXMVECTOR V1,
7586    FXMVECTOR V2
7587)
7588{
7589#if defined(_XM_NO_INTRINSICS_)
7590    return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0);
7591#elif defined(_XM_SSE_INTRINSICS_)
7592    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
7593    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7594#else // _XM_VMX128_INTRINSICS_
7595    return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2));
7596#endif
7597}
7598
7599//------------------------------------------------------------------------------
7600
7601XMFINLINE UINT XMVector3GreaterOrEqualR
7602(
7603    FXMVECTOR V1,
7604    FXMVECTOR V2
7605)
7606{
7607#if defined(_XM_NO_INTRINSICS_)
7608
7609    UINT CR = 0;
7610    if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
7611        (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
7612        (V1.vector4_f32[2] >= V2.vector4_f32[2]))
7613    {
7614        CR = XM_CRMASK_CR6TRUE;
7615    }
7616    else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
7617        (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
7618        (V1.vector4_f32[2] < V2.vector4_f32[2]))
7619    {
7620        CR = XM_CRMASK_CR6FALSE;
7621    }
7622    return CR;
7623
7624#elif defined(_XM_SSE_INTRINSICS_)
7625    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
7626    UINT CR = 0;
7627    int iTest = _mm_movemask_ps(vTemp)&7;
7628    if (iTest==7)
7629    {
7630        CR =  XM_CRMASK_CR6TRUE;
7631    }
7632    else if (!iTest)
7633    {
7634        CR = XM_CRMASK_CR6FALSE;
7635    }
7636    return CR;
7637#else // _XM_VMX128_INTRINSICS_
7638#endif // _XM_VMX128_INTRINSICS_
7639}
7640
7641//------------------------------------------------------------------------------
7642
7643XMFINLINE BOOL XMVector3Less
7644(
7645    FXMVECTOR V1,
7646    FXMVECTOR V2
7647)
7648{
7649#if defined(_XM_NO_INTRINSICS_)
7650    return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0);
7651#elif defined(_XM_SSE_INTRINSICS_)
7652    XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
7653    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7654#else // _XM_VMX128_INTRINSICS_
7655    return XMComparisonAllTrue(XMVector3GreaterR(V2, V1));
7656#endif
7657}
7658
7659//------------------------------------------------------------------------------
7660
7661XMFINLINE BOOL XMVector3LessOrEqual
7662(
7663    FXMVECTOR V1,
7664    FXMVECTOR V2
7665)
7666{
7667#if defined(_XM_NO_INTRINSICS_)
7668    return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0);
7669#elif defined(_XM_SSE_INTRINSICS_)
7670    XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
7671    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7672#else // _XM_VMX128_INTRINSICS_
7673    return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1));
7674#endif
7675}
7676
7677//------------------------------------------------------------------------------
7678
7679XMFINLINE BOOL XMVector3InBounds
7680(
7681    FXMVECTOR V,
7682    FXMVECTOR Bounds
7683)
7684{
7685#if defined(_XM_NO_INTRINSICS_)
7686    return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
7687        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
7688        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0);
7689#elif defined(_XM_SSE_INTRINSICS_)
7690    // Test if less than or equal
7691    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
7692    // Negate the bounds
7693    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
7694    // Test if greater or equal (Reversed)
7695    vTemp2 = _mm_cmple_ps(vTemp2,V);
7696    // Blend answers
7697    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
7698    // x,y and z in bounds? (w is don't care)
7699    return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0);
7700#else
7701    return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds));
7702#endif
7703}
7704
7705//------------------------------------------------------------------------------
7706
7707XMFINLINE UINT XMVector3InBoundsR
7708(
7709    FXMVECTOR V,
7710    FXMVECTOR Bounds
7711)
7712{
7713#if defined(_XM_NO_INTRINSICS_)
7714    UINT CR = 0;
7715    if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
7716        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
7717        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]))
7718    {
7719        CR = XM_CRMASK_CR6BOUNDS;
7720    }
7721    return CR;
7722
7723#elif defined(_XM_SSE_INTRINSICS_)
7724    // Test if less than or equal
7725    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
7726    // Negate the bounds
7727    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
7728    // Test if greater or equal (Reversed)
7729    vTemp2 = _mm_cmple_ps(vTemp2,V);
7730    // Blend answers
7731    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
7732    // x,y and z in bounds? (w is don't care)
7733    return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0;
7734#else // _XM_VMX128_INTRINSICS_
7735#endif // _XM_VMX128_INTRINSICS_
7736}
7737
7738//------------------------------------------------------------------------------
7739
7740XMFINLINE BOOL XMVector3IsNaN
7741(
7742    FXMVECTOR V
7743)
7744{
7745#if defined(_XM_NO_INTRINSICS_)
7746
7747    return (XMISNAN(V.vector4_f32[0]) ||
7748            XMISNAN(V.vector4_f32[1]) ||
7749            XMISNAN(V.vector4_f32[2]));
7750
7751#elif defined(_XM_SSE_INTRINSICS_)
7752    // Mask off the exponent
7753    __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
7754    // Mask off the mantissa
7755    __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
7756    // Are any of the exponents == 0x7F800000?
7757    vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
7758    // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
7759    vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
7760    // Perform a not on the NaN test to be true on NON-zero mantissas
7761    vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
7762    // If x, y or z are NaN, the signs are true after the merge above
7763    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0);
7764#else // _XM_VMX128_INTRINSICS_
7765#endif // _XM_VMX128_INTRINSICS_
7766}
7767
7768//------------------------------------------------------------------------------
7769
7770XMFINLINE BOOL XMVector3IsInfinite
7771(
7772    FXMVECTOR V
7773)
7774{
7775#if defined(_XM_NO_INTRINSICS_)
7776    return (XMISINF(V.vector4_f32[0]) ||
7777            XMISINF(V.vector4_f32[1]) ||
7778            XMISINF(V.vector4_f32[2]));
7779#elif defined(_XM_SSE_INTRINSICS_)
7780    // Mask off the sign bit
7781    __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
7782    // Compare to infinity
7783    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
7784    // If x,y or z are infinity, the signs are true.
7785    return ((_mm_movemask_ps(vTemp)&7) != 0);
7786#else // _XM_VMX128_INTRINSICS_
7787#endif // _XM_VMX128_INTRINSICS_
7788}
7789
7790//------------------------------------------------------------------------------
7791// Computation operations
7792//------------------------------------------------------------------------------
7793
7794//------------------------------------------------------------------------------
7795
7796XMFINLINE XMVECTOR XMVector3Dot
7797(
7798    FXMVECTOR V1,
7799    FXMVECTOR V2
7800)
7801{
7802#if defined(_XM_NO_INTRINSICS_)
7803    FLOAT fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2];
7804    XMVECTOR vResult = {
7805        fValue,
7806        fValue,
7807        fValue,
7808        fValue
7809    };
7810    return vResult;
7811
7812#elif defined(_XM_SSE_INTRINSICS_)
7813    // Perform the dot product
7814    XMVECTOR vDot = _mm_mul_ps(V1,V2);
7815    // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
7816    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
7817    // Result.vector4_f32[0] = x+y
7818    vDot = _mm_add_ss(vDot,vTemp);
7819    // x=Dot.vector4_f32[2]
7820    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7821    // Result.vector4_f32[0] = (x+y)+z
7822    vDot = _mm_add_ss(vDot,vTemp);
7823    // Splat x
7824	return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
7825#else // _XM_VMX128_INTRINSICS_
7826#endif // _XM_VMX128_INTRINSICS_
7827}
7828
7829//------------------------------------------------------------------------------
7830
7831XMFINLINE XMVECTOR XMVector3Cross
7832(
7833    FXMVECTOR V1,
7834    FXMVECTOR V2
7835)
7836{
7837#if defined(_XM_NO_INTRINSICS_)
7838    XMVECTOR vResult = {
7839        (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]),
7840        (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]),
7841        (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]),
7842        0.0f
7843    };
7844    return vResult;
7845
7846#elif defined(_XM_SSE_INTRINSICS_)
7847    // y1,z1,x1,w1
7848    XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
7849    // z2,x2,y2,w2
7850    XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
7851    // Perform the left operation
7852    XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2);
7853    // z1,x1,y1,w1
7854    vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
7855    // y2,z2,x2,w2
7856    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
7857    // Perform the right operation
7858    vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
7859    // Subract the right from left, and return answer
7860    vResult = _mm_sub_ps(vResult,vTemp1);
7861    // Set w to zero
7862    return _mm_and_ps(vResult,g_XMMask3);
7863#else // _XM_VMX128_INTRINSICS_
7864#endif // _XM_VMX128_INTRINSICS_
7865}
7866
7867//------------------------------------------------------------------------------
7868
7869XMFINLINE XMVECTOR XMVector3LengthSq
7870(
7871    FXMVECTOR V
7872)
7873{
7874    return XMVector3Dot(V, V);
7875}
7876
7877//------------------------------------------------------------------------------
7878
7879XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst
7880(
7881    FXMVECTOR V
7882)
7883{
7884#if defined(_XM_NO_INTRINSICS_)
7885
7886    XMVECTOR Result;
7887
7888    Result = XMVector3LengthSq(V);
7889    Result = XMVectorReciprocalSqrtEst(Result);
7890
7891    return Result;
7892
7893#elif defined(_XM_SSE_INTRINSICS_)
7894    // Perform the dot product on x,y and z
7895    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
7896    // vTemp has z and y
7897    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
7898    // x+z, y
7899    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7900    // y,y,y,y
7901    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7902    // x+z+y,??,??,??
7903    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7904    // Splat the length squared
7905	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
7906    // Get the reciprocal
7907    vLengthSq = _mm_rsqrt_ps(vLengthSq);
7908    return vLengthSq;
7909#else // _XM_VMX128_INTRINSICS_
7910#endif // _XM_VMX128_INTRINSICS_
7911}
7912
7913//------------------------------------------------------------------------------
7914
7915XMFINLINE XMVECTOR XMVector3ReciprocalLength
7916(
7917    FXMVECTOR V
7918)
7919{
7920#if defined(_XM_NO_INTRINSICS_)
7921
7922    XMVECTOR Result;
7923
7924    Result = XMVector3LengthSq(V);
7925    Result = XMVectorReciprocalSqrt(Result);
7926
7927    return Result;
7928
7929#elif defined(_XM_SSE_INTRINSICS_)
7930     // Perform the dot product
7931    XMVECTOR vDot = _mm_mul_ps(V,V);
7932    // x=Dot.y, y=Dot.z
7933    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
7934    // Result.x = x+y
7935    vDot = _mm_add_ss(vDot,vTemp);
7936    // x=Dot.z
7937    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7938    // Result.x = (x+y)+z
7939    vDot = _mm_add_ss(vDot,vTemp);
7940    // Splat x
7941	vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
7942    // Get the reciprocal
7943    vDot = _mm_sqrt_ps(vDot);
7944    // Get the reciprocal
7945    vDot = _mm_div_ps(g_XMOne,vDot);
7946    return vDot;
7947#else // _XM_VMX128_INTRINSICS_
7948#endif // _XM_VMX128_INTRINSICS_
7949}
7950
7951//------------------------------------------------------------------------------
7952
7953XMFINLINE XMVECTOR XMVector3LengthEst
7954(
7955    FXMVECTOR V
7956)
7957{
7958#if defined(_XM_NO_INTRINSICS_)
7959
7960    XMVECTOR Result;
7961
7962    Result = XMVector3LengthSq(V);
7963    Result = XMVectorSqrtEst(Result);
7964
7965    return Result;
7966
7967#elif defined(_XM_SSE_INTRINSICS_)
7968    // Perform the dot product on x,y and z
7969    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
7970    // vTemp has z and y
7971    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
7972    // x+z, y
7973    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7974    // y,y,y,y
7975    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7976    // x+z+y,??,??,??
7977    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7978    // Splat the length squared
7979	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
7980    // Get the length
7981    vLengthSq = _mm_sqrt_ps(vLengthSq);
7982    return vLengthSq;
7983#else // _XM_VMX128_INTRINSICS_
7984#endif // _XM_VMX128_INTRINSICS_
7985}
7986
7987//------------------------------------------------------------------------------
7988
7989XMFINLINE XMVECTOR XMVector3Length
7990(
7991    FXMVECTOR V
7992)
7993{
7994#if defined(_XM_NO_INTRINSICS_)
7995
7996    XMVECTOR Result;
7997
7998    Result = XMVector3LengthSq(V);
7999    Result = XMVectorSqrt(Result);
8000
8001    return Result;
8002
8003#elif defined(_XM_SSE_INTRINSICS_)
8004    // Perform the dot product on x,y and z
8005    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
8006    // vTemp has z and y
8007    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
8008    // x+z, y
8009    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8010    // y,y,y,y
8011    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8012    // x+z+y,??,??,??
8013    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8014    // Splat the length squared
8015	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
8016    // Get the length
8017    vLengthSq = _mm_sqrt_ps(vLengthSq);
8018    return vLengthSq;
8019#else // _XM_VMX128_INTRINSICS_
8020#endif // _XM_VMX128_INTRINSICS_
8021}
8022
8023//------------------------------------------------------------------------------
8024// XMVector3NormalizeEst uses a reciprocal estimate and
8025// returns QNaN on zero and infinite vectors.
8026
8027XMFINLINE XMVECTOR XMVector3NormalizeEst
8028(
8029    FXMVECTOR V
8030)
8031{
8032#if defined(_XM_NO_INTRINSICS_)
8033
8034    XMVECTOR Result;
8035    Result = XMVector3ReciprocalLength(V);
8036    Result = XMVectorMultiply(V, Result);
8037    return Result;
8038
8039#elif defined(_XM_SSE_INTRINSICS_)
8040     // Perform the dot product
8041    XMVECTOR vDot = _mm_mul_ps(V,V);
8042    // x=Dot.y, y=Dot.z
8043    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
8044    // Result.x = x+y
8045    vDot = _mm_add_ss(vDot,vTemp);
8046    // x=Dot.z
8047    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8048    // Result.x = (x+y)+z
8049    vDot = _mm_add_ss(vDot,vTemp);
8050    // Splat x
8051	vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
8052    // Get the reciprocal
8053    vDot = _mm_rsqrt_ps(vDot);
8054    // Perform the normalization
8055    vDot = _mm_mul_ps(vDot,V);
8056    return vDot;
8057#else // _XM_VMX128_INTRINSICS_
8058#endif // _XM_VMX128_INTRINSICS_
8059}
8060
8061//------------------------------------------------------------------------------
8062
8063XMFINLINE XMVECTOR XMVector3Normalize
8064(
8065    FXMVECTOR V
8066)
8067{
8068#if defined(_XM_NO_INTRINSICS_)
8069    FLOAT fLength;
8070    XMVECTOR vResult;
8071
8072    vResult = XMVector3Length( V );
8073    fLength = vResult.vector4_f32[0];
8074
8075    // Prevent divide by zero
8076    if (fLength > 0) {
8077        fLength = 1.0f/fLength;
8078    }
8079
8080    vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
8081    vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
8082    vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
8083    vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
8084    return vResult;
8085
8086#elif defined(_XM_SSE_INTRINSICS_)
8087    // Perform the dot product on x,y and z only
8088    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
8089    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
8090    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8091    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8092    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8093	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
8094    // Prepare for the division
8095    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
8096    // Create zero with a single instruction
8097    XMVECTOR vZeroMask = _mm_setzero_ps();
8098    // Test for a divide by zero (Must be FP to detect -0.0)
8099    vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
8100    // Failsafe on zero (Or epsilon) length planes
8101    // If the length is infinity, set the elements to zero
8102    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
8103    // Divide to perform the normalization
8104    vResult = _mm_div_ps(V,vResult);
8105    // Any that are infinity, set to zero
8106    vResult = _mm_and_ps(vResult,vZeroMask);
8107    // Select qnan or result based on infinite length
8108	XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
8109    XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
8110    vResult = _mm_or_ps(vTemp1,vTemp2);
8111    return vResult;
8112#else // _XM_VMX128_INTRINSICS_
8113#endif // _XM_VMX128_INTRINSICS_
8114}
8115
8116//------------------------------------------------------------------------------
8117
8118XMFINLINE XMVECTOR XMVector3ClampLength
8119(
8120    FXMVECTOR V,
8121    FLOAT    LengthMin,
8122    FLOAT    LengthMax
8123)
8124{
8125#if defined(_XM_NO_INTRINSICS_)
8126
8127    XMVECTOR ClampMax;
8128    XMVECTOR ClampMin;
8129
8130    ClampMax = XMVectorReplicate(LengthMax);
8131    ClampMin = XMVectorReplicate(LengthMin);
8132
8133    return XMVector3ClampLengthV(V, ClampMin, ClampMax);
8134
8135#elif defined(_XM_SSE_INTRINSICS_)
8136    XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
8137    XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
8138    return XMVector3ClampLengthV(V,ClampMin,ClampMax);
8139#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8140#endif // _XM_VMX128_INTRINSICS_
8141}
8142
8143//------------------------------------------------------------------------------
8144
8145XMFINLINE XMVECTOR XMVector3ClampLengthV
8146(
8147    FXMVECTOR V,
8148    FXMVECTOR LengthMin,
8149    FXMVECTOR LengthMax
8150)
8151{
8152#if defined(_XM_NO_INTRINSICS_)
8153
8154    XMVECTOR ClampLength;
8155    XMVECTOR LengthSq;
8156    XMVECTOR RcpLength;
8157    XMVECTOR Length;
8158    XMVECTOR Normal;
8159    XMVECTOR Zero;
8160    XMVECTOR InfiniteLength;
8161    XMVECTOR ZeroLength;
8162    XMVECTOR Select;
8163    XMVECTOR ControlMax;
8164    XMVECTOR ControlMin;
8165    XMVECTOR Control;
8166    XMVECTOR Result;
8167
8168    XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]));
8169    XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]));
8170    XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero()));
8171    XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero()));
8172    XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
8173
8174    LengthSq = XMVector3LengthSq(V);
8175
8176    Zero = XMVectorZero();
8177
8178    RcpLength = XMVectorReciprocalSqrt(LengthSq);
8179
8180    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
8181    ZeroLength = XMVectorEqual(LengthSq, Zero);
8182
8183    Normal = XMVectorMultiply(V, RcpLength);
8184
8185    Length = XMVectorMultiply(LengthSq, RcpLength);
8186
8187    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
8188    Length = XMVectorSelect(LengthSq, Length, Select);
8189    Normal = XMVectorSelect(LengthSq, Normal, Select);
8190
8191    ControlMax = XMVectorGreater(Length, LengthMax);
8192    ControlMin = XMVectorLess(Length, LengthMin);
8193
8194    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
8195    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
8196
8197    Result = XMVectorMultiply(Normal, ClampLength);
8198
8199    // Preserve the original vector (with no precision loss) if the length falls within the given range
8200    Control = XMVectorEqualInt(ControlMax, ControlMin);
8201    Result = XMVectorSelect(Result, V, Control);
8202
8203    return Result;
8204
8205#elif defined(_XM_SSE_INTRINSICS_)
8206    XMVECTOR ClampLength;
8207    XMVECTOR LengthSq;
8208    XMVECTOR RcpLength;
8209    XMVECTOR Length;
8210    XMVECTOR Normal;
8211    XMVECTOR InfiniteLength;
8212    XMVECTOR ZeroLength;
8213    XMVECTOR Select;
8214    XMVECTOR ControlMax;
8215    XMVECTOR ControlMin;
8216    XMVECTOR Control;
8217    XMVECTOR Result;
8218
8219    XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)));
8220    XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)));
8221    XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero));
8222    XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero));
8223    XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
8224
8225    LengthSq = XMVector3LengthSq(V);
8226    RcpLength = XMVectorReciprocalSqrt(LengthSq);
8227    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
8228    ZeroLength = XMVectorEqual(LengthSq,g_XMZero);
8229    Normal = _mm_mul_ps(V, RcpLength);
8230    Length = _mm_mul_ps(LengthSq, RcpLength);
8231    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
8232    Length = XMVectorSelect(LengthSq, Length, Select);
8233    Normal = XMVectorSelect(LengthSq, Normal, Select);
8234    ControlMax = XMVectorGreater(Length, LengthMax);
8235    ControlMin = XMVectorLess(Length, LengthMin);
8236    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
8237    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
8238    Result = _mm_mul_ps(Normal, ClampLength);
8239    // Preserve the original vector (with no precision loss) if the length falls within the given range
8240    Control = XMVectorEqualInt(ControlMax, ControlMin);
8241    Result = XMVectorSelect(Result, V, Control);
8242    return Result;
8243#else // _XM_VMX128_INTRINSICS_
8244#endif // _XM_VMX128_INTRINSICS_
8245}
8246
8247//------------------------------------------------------------------------------
8248
8249XMFINLINE XMVECTOR XMVector3Reflect
8250(
8251    FXMVECTOR Incident,
8252    FXMVECTOR Normal
8253)
8254{
8255#if defined(_XM_NO_INTRINSICS_)
8256
8257    XMVECTOR Result;
8258
8259    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
8260    Result = XMVector3Dot(Incident, Normal);
8261    Result = XMVectorAdd(Result, Result);
8262    Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
8263
8264    return Result;
8265
8266#elif defined(_XM_SSE_INTRINSICS_)
8267    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
8268    XMVECTOR Result = XMVector3Dot(Incident, Normal);
8269    Result = _mm_add_ps(Result, Result);
8270    Result = _mm_mul_ps(Result, Normal);
8271    Result = _mm_sub_ps(Incident,Result);
8272    return Result;
8273#else // _XM_VMX128_INTRINSICS_
8274#endif // _XM_VMX128_INTRINSICS_
8275}
8276
8277//------------------------------------------------------------------------------
8278
8279XMFINLINE XMVECTOR XMVector3Refract
8280(
8281    FXMVECTOR Incident,
8282    FXMVECTOR Normal,
8283    FLOAT    RefractionIndex
8284)
8285{
8286#if defined(_XM_NO_INTRINSICS_)
8287
8288    XMVECTOR Index;
8289    Index = XMVectorReplicate(RefractionIndex);
8290    return XMVector3RefractV(Incident, Normal, Index);
8291
8292#elif defined(_XM_SSE_INTRINSICS_)
8293    XMVECTOR Index = _mm_set_ps1(RefractionIndex);
8294    return XMVector3RefractV(Incident,Normal,Index);
8295#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8296#endif // _XM_VMX128_INTRINSICS_
8297}
8298
8299//------------------------------------------------------------------------------
8300
8301XMFINLINE XMVECTOR XMVector3RefractV
8302(
8303    FXMVECTOR Incident,
8304    FXMVECTOR Normal,
8305    FXMVECTOR RefractionIndex
8306)
8307{
8308#if defined(_XM_NO_INTRINSICS_)
8309
8310    XMVECTOR        IDotN;
8311    XMVECTOR        R;
8312    CONST XMVECTOR  Zero = XMVectorZero();
8313
8314    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
8315    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
8316
8317    IDotN = XMVector3Dot(Incident, Normal);
8318
8319    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
8320    R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
8321    R = XMVectorMultiply(R, RefractionIndex);
8322    R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
8323
8324    if (XMVector4LessOrEqual(R, Zero))
8325    {
8326        // Total internal reflection
8327        return Zero;
8328    }
8329    else
8330    {
8331        XMVECTOR Result;
8332
8333        // R = RefractionIndex * IDotN + sqrt(R)
8334        R = XMVectorSqrt(R);
8335        R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
8336
8337        // Result = RefractionIndex * Incident - Normal * R
8338        Result = XMVectorMultiply(RefractionIndex, Incident);
8339        Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
8340
8341        return Result;
8342    }
8343
8344#elif defined(_XM_SSE_INTRINSICS_)
8345    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
8346    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
8347    XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
8348    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
8349    XMVECTOR R = _mm_mul_ps(IDotN, IDotN);
8350    R = _mm_sub_ps(g_XMOne,R);
8351    R = _mm_mul_ps(R, RefractionIndex);
8352    R = _mm_mul_ps(R, RefractionIndex);
8353    R = _mm_sub_ps(g_XMOne,R);
8354
8355    XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
8356    if (_mm_movemask_ps(vResult)==0x0f)
8357    {
8358        // Total internal reflection
8359        vResult = g_XMZero;
8360    }
8361    else
8362    {
8363        // R = RefractionIndex * IDotN + sqrt(R)
8364        R = _mm_sqrt_ps(R);
8365        vResult = _mm_mul_ps(RefractionIndex,IDotN);
8366        R = _mm_add_ps(R,vResult);
8367        // Result = RefractionIndex * Incident - Normal * R
8368        vResult = _mm_mul_ps(RefractionIndex, Incident);
8369        R = _mm_mul_ps(R,Normal);
8370        vResult = _mm_sub_ps(vResult,R);
8371    }
8372    return vResult;
8373#else // _XM_VMX128_INTRINSICS_
8374#endif // _XM_VMX128_INTRINSICS_
8375}
8376
8377//------------------------------------------------------------------------------
8378
8379XMFINLINE XMVECTOR XMVector3Orthogonal
8380(
8381    FXMVECTOR V
8382)
8383{
8384#if defined(_XM_NO_INTRINSICS_)
8385
8386    XMVECTOR NegativeV;
8387    XMVECTOR Z, YZYY;
8388    XMVECTOR ZIsNegative, YZYYIsNegative;
8389    XMVECTOR S, D;
8390    XMVECTOR R0, R1;
8391    XMVECTOR Select;
8392    XMVECTOR Zero;
8393    XMVECTOR Result;
8394    static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
8395    static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
8396
8397    Zero = XMVectorZero();
8398    Z = XMVectorSplatZ(V);
8399    YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v);
8400
8401    NegativeV = XMVectorSubtract(Zero, V);
8402
8403    ZIsNegative = XMVectorLess(Z, Zero);
8404    YZYYIsNegative = XMVectorLess(YZYY, Zero);
8405
8406    S = XMVectorAdd(YZYY, Z);
8407    D = XMVectorSubtract(YZYY, Z);
8408
8409    Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
8410
8411    R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v);
8412    R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v);
8413
8414    Result = XMVectorSelect(R1, R0, Select);
8415
8416    return Result;
8417
8418#elif defined(_XM_SSE_INTRINSICS_)
8419    XMVECTOR NegativeV;
8420    XMVECTOR Z, YZYY;
8421    XMVECTOR ZIsNegative, YZYYIsNegative;
8422    XMVECTOR S, D;
8423    XMVECTOR R0, R1;
8424    XMVECTOR Select;
8425    XMVECTOR Zero;
8426    XMVECTOR Result;
8427    static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
8428    static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
8429
8430    Zero = XMVectorZero();
8431    Z = XMVectorSplatZ(V);
8432    YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y);
8433
8434    NegativeV = _mm_sub_ps(Zero, V);
8435
8436    ZIsNegative = XMVectorLess(Z, Zero);
8437    YZYYIsNegative = XMVectorLess(YZYY, Zero);
8438
8439    S = _mm_add_ps(YZYY, Z);
8440    D = _mm_sub_ps(YZYY, Z);
8441
8442    Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
8443
8444    R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X);
8445    R1 = XMVectorPermute(V, D,Permute1X0X0X0X);
8446    Result = XMVectorSelect(R1, R0, Select);
8447    return Result;
8448#else // _XM_VMX128_INTRINSICS_
8449#endif // _XM_VMX128_INTRINSICS_
8450}
8451
8452//------------------------------------------------------------------------------
8453
8454XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst
8455(
8456    FXMVECTOR N1,
8457    FXMVECTOR N2
8458)
8459{
8460#if defined(_XM_NO_INTRINSICS_)
8461
8462    XMVECTOR Result;
8463    XMVECTOR NegativeOne;
8464    XMVECTOR One;
8465
8466    Result = XMVector3Dot(N1, N2);
8467    NegativeOne = XMVectorSplatConstant(-1, 0);
8468    One = XMVectorSplatOne();
8469    Result = XMVectorClamp(Result, NegativeOne, One);
8470    Result = XMVectorACosEst(Result);
8471
8472    return Result;
8473
8474#elif defined(_XM_SSE_INTRINSICS_)
8475    XMVECTOR vResult = XMVector3Dot(N1,N2);
8476    // Clamp to -1.0f to 1.0f
8477    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
8478    vResult = _mm_min_ps(vResult,g_XMOne);
8479    vResult = XMVectorACosEst(vResult);
8480    return vResult;
8481#else // _XM_VMX128_INTRINSICS_
8482#endif // _XM_VMX128_INTRINSICS_
8483}
8484
8485//------------------------------------------------------------------------------
8486
8487XMFINLINE XMVECTOR XMVector3AngleBetweenNormals
8488(
8489    FXMVECTOR N1,
8490    FXMVECTOR N2
8491)
8492{
8493#if defined(_XM_NO_INTRINSICS_)
8494
8495    XMVECTOR Result;
8496    XMVECTOR NegativeOne;
8497    XMVECTOR One;
8498
8499    Result = XMVector3Dot(N1, N2);
8500    NegativeOne = XMVectorSplatConstant(-1, 0);
8501    One = XMVectorSplatOne();
8502    Result = XMVectorClamp(Result, NegativeOne, One);
8503    Result = XMVectorACos(Result);
8504
8505    return Result;
8506
8507#elif defined(_XM_SSE_INTRINSICS_)
8508    XMVECTOR vResult = XMVector3Dot(N1,N2);
8509    // Clamp to -1.0f to 1.0f
8510    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
8511    vResult = _mm_min_ps(vResult,g_XMOne);
8512    vResult = XMVectorACos(vResult);
8513    return vResult;
8514#else // _XM_VMX128_INTRINSICS_
8515#endif // _XM_VMX128_INTRINSICS_
8516}
8517
8518//------------------------------------------------------------------------------
8519
8520XMFINLINE XMVECTOR XMVector3AngleBetweenVectors
8521(
8522    FXMVECTOR V1,
8523    FXMVECTOR V2
8524)
8525{
8526#if defined(_XM_NO_INTRINSICS_)
8527
8528    XMVECTOR L1;
8529    XMVECTOR L2;
8530    XMVECTOR Dot;
8531    XMVECTOR CosAngle;
8532    XMVECTOR NegativeOne;
8533    XMVECTOR One;
8534    XMVECTOR Result;
8535
8536    L1 = XMVector3ReciprocalLength(V1);
8537    L2 = XMVector3ReciprocalLength(V2);
8538
8539    Dot = XMVector3Dot(V1, V2);
8540
8541    L1 = XMVectorMultiply(L1, L2);
8542
8543    NegativeOne = XMVectorSplatConstant(-1, 0);
8544    One = XMVectorSplatOne();
8545
8546    CosAngle = XMVectorMultiply(Dot, L1);
8547
8548    CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
8549
8550    Result = XMVectorACos(CosAngle);
8551
8552    return Result;
8553
8554#elif defined(_XM_SSE_INTRINSICS_)
8555    XMVECTOR L1;
8556    XMVECTOR L2;
8557    XMVECTOR Dot;
8558    XMVECTOR CosAngle;
8559    XMVECTOR Result;
8560
8561    L1 = XMVector3ReciprocalLength(V1);
8562    L2 = XMVector3ReciprocalLength(V2);
8563    Dot = XMVector3Dot(V1, V2);
8564    L1 = _mm_mul_ps(L1, L2);
8565    CosAngle = _mm_mul_ps(Dot, L1);
8566    CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne);
8567    Result = XMVectorACos(CosAngle);
8568    return Result;
8569#else // _XM_VMX128_INTRINSICS_
8570#endif // _XM_VMX128_INTRINSICS_
8571}
8572
8573//------------------------------------------------------------------------------
8574
8575XMFINLINE XMVECTOR XMVector3LinePointDistance
8576(
8577    FXMVECTOR LinePoint1,
8578    FXMVECTOR LinePoint2,
8579    FXMVECTOR Point
8580)
8581{
8582#if defined(_XM_NO_INTRINSICS_)
8583
8584    XMVECTOR PointVector;
8585    XMVECTOR LineVector;
8586    XMVECTOR ReciprocalLengthSq;
8587    XMVECTOR PointProjectionScale;
8588    XMVECTOR DistanceVector;
8589    XMVECTOR Result;
8590
8591    // Given a vector PointVector from LinePoint1 to Point and a vector
8592    // LineVector from LinePoint1 to LinePoint2, the scaled distance
8593    // PointProjectionScale from LinePoint1 to the perpendicular projection
8594    // of PointVector onto the line is defined as:
8595    //
8596    //     PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
8597
8598    PointVector = XMVectorSubtract(Point, LinePoint1);
8599    LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
8600
8601    ReciprocalLengthSq = XMVector3LengthSq(LineVector);
8602    ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
8603
8604    PointProjectionScale = XMVector3Dot(PointVector, LineVector);
8605    PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
8606
8607    DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
8608    DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
8609
8610    Result = XMVector3Length(DistanceVector);
8611
8612    return Result;
8613
8614#elif defined(_XM_SSE_INTRINSICS_)
8615    XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
8616    XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
8617    XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector);
8618    XMVECTOR vResult = XMVector3Dot(PointVector,LineVector);
8619    vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
8620    vResult = _mm_mul_ps(vResult,LineVector);
8621    vResult = _mm_sub_ps(PointVector,vResult);
8622    vResult = XMVector3Length(vResult);
8623    return vResult;
8624#else // _XM_VMX128_INTRINSICS_
8625#endif // _XM_VMX128_INTRINSICS_
8626}
8627
8628//------------------------------------------------------------------------------
8629
8630XMFINLINE VOID XMVector3ComponentsFromNormal
8631(
8632    XMVECTOR* pParallel,
8633    XMVECTOR* pPerpendicular,
8634    FXMVECTOR  V,
8635    FXMVECTOR  Normal
8636)
8637{
8638#if defined(_XM_NO_INTRINSICS_)
8639
8640    XMVECTOR Parallel;
8641    XMVECTOR Scale;
8642
8643    XMASSERT(pParallel);
8644    XMASSERT(pPerpendicular);
8645
8646    Scale = XMVector3Dot(V, Normal);
8647
8648    Parallel = XMVectorMultiply(Normal, Scale);
8649
8650    *pParallel = Parallel;
8651    *pPerpendicular = XMVectorSubtract(V, Parallel);
8652
8653#elif defined(_XM_SSE_INTRINSICS_)
8654	XMASSERT(pParallel);
8655	XMASSERT(pPerpendicular);
8656    XMVECTOR Scale = XMVector3Dot(V, Normal);
8657    XMVECTOR Parallel = _mm_mul_ps(Normal,Scale);
8658    *pParallel = Parallel;
8659    *pPerpendicular = _mm_sub_ps(V,Parallel);
8660#else // _XM_VMX128_INTRINSICS_
8661#endif // _XM_VMX128_INTRINSICS_
8662}
8663
8664//------------------------------------------------------------------------------
8665// Transform a vector using a rotation expressed as a unit quaternion
8666
8667XMFINLINE XMVECTOR XMVector3Rotate
8668(
8669    FXMVECTOR V,
8670    FXMVECTOR RotationQuaternion
8671)
8672{
8673#if defined(_XM_NO_INTRINSICS_)
8674
8675    XMVECTOR A;
8676    XMVECTOR Q;
8677    XMVECTOR Result;
8678
8679    A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
8680    Q = XMQuaternionConjugate(RotationQuaternion);
8681    Result = XMQuaternionMultiply(Q, A);
8682    Result = XMQuaternionMultiply(Result, RotationQuaternion);
8683
8684    return Result;
8685
8686#elif defined(_XM_SSE_INTRINSICS_)
8687    XMVECTOR A;
8688    XMVECTOR Q;
8689    XMVECTOR Result;
8690
8691    A = _mm_and_ps(V,g_XMMask3);
8692    Q = XMQuaternionConjugate(RotationQuaternion);
8693    Result = XMQuaternionMultiply(Q, A);
8694    Result = XMQuaternionMultiply(Result, RotationQuaternion);
8695    return Result;
8696#else // _XM_VMX128_INTRINSICS_
8697#endif // _XM_VMX128_INTRINSICS_
8698}
8699
8700//------------------------------------------------------------------------------
8701// Transform a vector using the inverse of a rotation expressed as a unit quaternion
8702
8703XMFINLINE XMVECTOR XMVector3InverseRotate
8704(
8705    FXMVECTOR V,
8706    FXMVECTOR RotationQuaternion
8707)
8708{
8709#if defined(_XM_NO_INTRINSICS_)
8710
8711    XMVECTOR A;
8712    XMVECTOR Q;
8713    XMVECTOR Result;
8714
8715    A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
8716    Result = XMQuaternionMultiply(RotationQuaternion, A);
8717    Q = XMQuaternionConjugate(RotationQuaternion);
8718    Result = XMQuaternionMultiply(Result, Q);
8719
8720    return Result;
8721
8722#elif defined(_XM_SSE_INTRINSICS_)
8723    XMVECTOR A;
8724    XMVECTOR Q;
8725    XMVECTOR Result;
8726    A = _mm_and_ps(V,g_XMMask3);
8727    Result = XMQuaternionMultiply(RotationQuaternion, A);
8728    Q = XMQuaternionConjugate(RotationQuaternion);
8729    Result = XMQuaternionMultiply(Result, Q);
8730    return Result;
8731#else // _XM_VMX128_INTRINSICS_
8732#endif // _XM_VMX128_INTRINSICS_
8733}
8734
8735//------------------------------------------------------------------------------
8736
8737XMFINLINE XMVECTOR XMVector3Transform
8738(
8739    FXMVECTOR V,
8740    CXMMATRIX M
8741)
8742{
8743#if defined(_XM_NO_INTRINSICS_)
8744
8745    XMVECTOR X;
8746    XMVECTOR Y;
8747    XMVECTOR Z;
8748    XMVECTOR Result;
8749
8750    Z = XMVectorSplatZ(V);
8751    Y = XMVectorSplatY(V);
8752    X = XMVectorSplatX(V);
8753
8754    Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8755    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8756    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8757
8758    return Result;
8759
8760#elif defined(_XM_SSE_INTRINSICS_)
8761    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
8762    vResult = _mm_mul_ps(vResult,M.r[0]);
8763    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
8764    vTemp = _mm_mul_ps(vTemp,M.r[1]);
8765    vResult = _mm_add_ps(vResult,vTemp);
8766    vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
8767    vTemp = _mm_mul_ps(vTemp,M.r[2]);
8768    vResult = _mm_add_ps(vResult,vTemp);
8769    vResult = _mm_add_ps(vResult,M.r[3]);
8770    return vResult;
8771#else // _XM_VMX128_INTRINSICS_
8772#endif // _XM_VMX128_INTRINSICS_
8773}
8774
8775//------------------------------------------------------------------------------
8776
8777XMINLINE XMFLOAT4* XMVector3TransformStream
8778(
8779    XMFLOAT4*       pOutputStream,
8780    UINT            OutputStride,
8781    CONST XMFLOAT3* pInputStream,
8782    UINT            InputStride,
8783    UINT            VectorCount,
8784    CXMMATRIX     M
8785)
8786{
8787#if defined(_XM_NO_INTRINSICS_)
8788
8789    XMVECTOR V;
8790    XMVECTOR X;
8791    XMVECTOR Y;
8792    XMVECTOR Z;
8793    XMVECTOR Result;
8794    UINT     i;
8795    BYTE*    pInputVector = (BYTE*)pInputStream;
8796    BYTE*    pOutputVector = (BYTE*)pOutputStream;
8797
8798    XMASSERT(pOutputStream);
8799    XMASSERT(pInputStream);
8800
8801    for (i = 0; i < VectorCount; i++)
8802    {
8803        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
8804        Z = XMVectorSplatZ(V);
8805        Y = XMVectorSplatY(V);
8806        X = XMVectorSplatX(V);
8807
8808        Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8809        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8810        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8811
8812        XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
8813
8814        pInputVector += InputStride;
8815        pOutputVector += OutputStride;
8816    }
8817
8818    return pOutputStream;
8819
8820#elif defined(_XM_SSE_INTRINSICS_)
8821    XMASSERT(pOutputStream);
8822    XMASSERT(pInputStream);
8823    UINT     i;
8824    const BYTE* pInputVector = (const BYTE*)pInputStream;
8825    BYTE*    pOutputVector = (BYTE*)pOutputStream;
8826
8827    for (i = 0; i < VectorCount; i++)
8828    {
8829        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
8830        XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
8831        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
8832        vResult = _mm_mul_ps(vResult,M.r[2]);
8833        vResult = _mm_add_ps(vResult,M.r[3]);
8834        Y = _mm_mul_ps(Y,M.r[1]);
8835        vResult = _mm_add_ps(vResult,Y);
8836        X = _mm_mul_ps(X,M.r[0]);
8837        vResult = _mm_add_ps(vResult,X);
8838        _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult);
8839        pInputVector += InputStride;
8840        pOutputVector += OutputStride;
8841    }
8842
8843    return pOutputStream;
8844#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8845#endif // _XM_VMX128_INTRINSICS_
8846}
8847
8848//------------------------------------------------------------------------------
8849
8850XMINLINE XMFLOAT4* XMVector3TransformStreamNC
8851(
8852    XMFLOAT4*       pOutputStream,
8853    UINT            OutputStride,
8854    CONST XMFLOAT3* pInputStream,
8855    UINT            InputStride,
8856    UINT            VectorCount,
8857    CXMMATRIX     M
8858)
8859{
8860#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
8861	return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
8862#else // _XM_VMX128_INTRINSICS_
8863#endif // _XM_VMX128_INTRINSICS_
8864}
8865
8866//------------------------------------------------------------------------------
8867
8868XMFINLINE XMVECTOR XMVector3TransformCoord
8869(
8870    FXMVECTOR V,
8871    CXMMATRIX M
8872)
8873{
8874#if defined(_XM_NO_INTRINSICS_)
8875
8876    XMVECTOR X;
8877    XMVECTOR Y;
8878    XMVECTOR Z;
8879    XMVECTOR InverseW;
8880    XMVECTOR Result;
8881
8882    Z = XMVectorSplatZ(V);
8883    Y = XMVectorSplatY(V);
8884    X = XMVectorSplatX(V);
8885
8886    Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8887    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8888    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8889
8890    InverseW = XMVectorSplatW(Result);
8891    InverseW = XMVectorReciprocal(InverseW);
8892
8893    Result = XMVectorMultiply(Result, InverseW);
8894
8895    return Result;
8896
8897#elif defined(_XM_SSE_INTRINSICS_)
8898    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
8899    vResult = _mm_mul_ps(vResult,M.r[0]);
8900    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
8901    vTemp = _mm_mul_ps(vTemp,M.r[1]);
8902    vResult = _mm_add_ps(vResult,vTemp);
8903    vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
8904    vTemp = _mm_mul_ps(vTemp,M.r[2]);
8905    vResult = _mm_add_ps(vResult,vTemp);
8906    vResult = _mm_add_ps(vResult,M.r[3]);
8907    vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
8908    vResult = _mm_div_ps(vResult,vTemp);
8909    return vResult;
8910#else // _XM_VMX128_INTRINSICS_
8911#endif // _XM_VMX128_INTRINSICS_
8912}
8913
8914//------------------------------------------------------------------------------
8915
8916XMINLINE XMFLOAT3* XMVector3TransformCoordStream
8917(
8918    XMFLOAT3*       pOutputStream,
8919    UINT            OutputStride,
8920    CONST XMFLOAT3* pInputStream,
8921    UINT            InputStride,
8922    UINT            VectorCount,
8923    CXMMATRIX     M
8924)
8925{
8926#if defined(_XM_NO_INTRINSICS_)
8927
8928    XMVECTOR V;
8929    XMVECTOR X;
8930    XMVECTOR Y;
8931    XMVECTOR Z;
8932    XMVECTOR InverseW;
8933    XMVECTOR Result;
8934    UINT     i;
8935    BYTE*    pInputVector = (BYTE*)pInputStream;
8936    BYTE*    pOutputVector = (BYTE*)pOutputStream;
8937
8938    XMASSERT(pOutputStream);
8939    XMASSERT(pInputStream);
8940
8941    for (i = 0; i < VectorCount; i++)
8942    {
8943        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
8944        Z = XMVectorSplatZ(V);
8945        Y = XMVectorSplatY(V);
8946        X = XMVectorSplatX(V);
8947//        Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
8948//        Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
8949//        X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
8950
8951        Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8952        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8953        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8954
8955        InverseW = XMVectorSplatW(Result);
8956        InverseW = XMVectorReciprocal(InverseW);
8957
8958        Result = XMVectorMultiply(Result, InverseW);
8959
8960        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
8961
8962        pInputVector += InputStride;
8963        pOutputVector += OutputStride;
8964    }
8965
8966    return pOutputStream;
8967
8968#elif defined(_XM_SSE_INTRINSICS_)
8969    XMASSERT(pOutputStream);
8970    XMASSERT(pInputStream);
8971
8972    UINT i;
8973    const BYTE *pInputVector = (BYTE*)pInputStream;
8974    BYTE *pOutputVector = (BYTE*)pOutputStream;
8975
8976    for (i = 0; i < VectorCount; i++)
8977    {
8978        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
8979        XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
8980        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
8981        vResult = _mm_mul_ps(vResult,M.r[2]);
8982        vResult = _mm_add_ps(vResult,M.r[3]);
8983        Y = _mm_mul_ps(Y,M.r[1]);
8984        vResult = _mm_add_ps(vResult,Y);
8985        X = _mm_mul_ps(X,M.r[0]);
8986        vResult = _mm_add_ps(vResult,X);
8987
8988        X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
8989        vResult = _mm_div_ps(vResult,X);
8990    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
8991        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
8992    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
8993        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
8994	    _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
8995        pInputVector += InputStride;
8996        pOutputVector += OutputStride;
8997    }
8998
8999    return pOutputStream;
9000#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9001#endif // _XM_VMX128_INTRINSICS_
9002}
9003
9004//------------------------------------------------------------------------------
9005
9006XMFINLINE XMVECTOR XMVector3TransformNormal
9007(
9008    FXMVECTOR V,
9009    CXMMATRIX M
9010)
9011{
9012#if defined(_XM_NO_INTRINSICS_)
9013
9014    XMVECTOR X;
9015    XMVECTOR Y;
9016    XMVECTOR Z;
9017    XMVECTOR Result;
9018
9019    Z = XMVectorSplatZ(V);
9020    Y = XMVectorSplatY(V);
9021    X = XMVectorSplatX(V);
9022
9023    Result = XMVectorMultiply(Z, M.r[2]);
9024    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
9025    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
9026
9027    return Result;
9028
9029#elif defined(_XM_SSE_INTRINSICS_)
9030    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
9031    vResult = _mm_mul_ps(vResult,M.r[0]);
9032    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
9033    vTemp = _mm_mul_ps(vTemp,M.r[1]);
9034    vResult = _mm_add_ps(vResult,vTemp);
9035    vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
9036    vTemp = _mm_mul_ps(vTemp,M.r[2]);
9037    vResult = _mm_add_ps(vResult,vTemp);
9038    return vResult;
9039#else // _XM_VMX128_INTRINSICS_
9040#endif // _XM_VMX128_INTRINSICS_
9041}
9042
9043//------------------------------------------------------------------------------
9044
9045XMINLINE XMFLOAT3* XMVector3TransformNormalStream
9046(
9047    XMFLOAT3*       pOutputStream,
9048    UINT            OutputStride,
9049    CONST XMFLOAT3* pInputStream,
9050    UINT            InputStride,
9051    UINT            VectorCount,
9052    CXMMATRIX     M
9053)
9054{
9055#if defined(_XM_NO_INTRINSICS_)
9056
9057    XMVECTOR V;
9058    XMVECTOR X;
9059    XMVECTOR Y;
9060    XMVECTOR Z;
9061    XMVECTOR Result;
9062    UINT     i;
9063    BYTE*    pInputVector = (BYTE*)pInputStream;
9064    BYTE*    pOutputVector = (BYTE*)pOutputStream;
9065
9066    XMASSERT(pOutputStream);
9067    XMASSERT(pInputStream);
9068
9069    for (i = 0; i < VectorCount; i++)
9070    {
9071        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9072        Z = XMVectorSplatZ(V);
9073        Y = XMVectorSplatY(V);
9074        X = XMVectorSplatX(V);
9075//        Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
9076//        Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
9077//        X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
9078
9079        Result = XMVectorMultiply(Z, M.r[2]);
9080        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
9081        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
9082
9083        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9084
9085        pInputVector += InputStride;
9086        pOutputVector += OutputStride;
9087    }
9088
9089    return pOutputStream;
9090
9091#elif defined(_XM_SSE_INTRINSICS_)
9092    XMASSERT(pOutputStream);
9093    XMASSERT(pInputStream);
9094
9095    UINT i;
9096    const BYTE *pInputVector = (BYTE*)pInputStream;
9097    BYTE *pOutputVector = (BYTE*)pOutputStream;
9098
9099    for (i = 0; i < VectorCount; i++)
9100    {
9101        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
9102        XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
9103        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
9104        vResult = _mm_mul_ps(vResult,M.r[2]);
9105        Y = _mm_mul_ps(Y,M.r[1]);
9106        vResult = _mm_add_ps(vResult,Y);
9107        X = _mm_mul_ps(X,M.r[0]);
9108        vResult = _mm_add_ps(vResult,X);
9109    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
9110        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9111    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
9112        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9113	    _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
9114        pInputVector += InputStride;
9115        pOutputVector += OutputStride;
9116    }
9117
9118    return pOutputStream;
9119#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9120#endif // _XM_VMX128_INTRINSICS_
9121}
9122
9123//------------------------------------------------------------------------------
9124
9125XMINLINE XMVECTOR XMVector3Project
9126(
9127    FXMVECTOR V,
9128    FLOAT    ViewportX,
9129    FLOAT    ViewportY,
9130    FLOAT    ViewportWidth,
9131    FLOAT    ViewportHeight,
9132    FLOAT    ViewportMinZ,
9133    FLOAT    ViewportMaxZ,
9134    CXMMATRIX Projection,
9135    CXMMATRIX View,
9136    CXMMATRIX World
9137)
9138{
9139#if defined(_XM_NO_INTRINSICS_)
9140
9141    XMMATRIX Transform;
9142    XMVECTOR Scale;
9143    XMVECTOR Offset;
9144    XMVECTOR Result;
9145    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9146    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9147
9148    Scale = XMVectorSet(HalfViewportWidth,
9149                        -HalfViewportHeight,
9150                        ViewportMaxZ - ViewportMinZ,
9151                        0.0f);
9152
9153    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9154                        ViewportY + HalfViewportHeight,
9155                        ViewportMinZ,
9156                        0.0f);
9157
9158    Transform = XMMatrixMultiply(World, View);
9159    Transform = XMMatrixMultiply(Transform, Projection);
9160
9161    Result = XMVector3TransformCoord(V, Transform);
9162
9163    Result = XMVectorMultiplyAdd(Result, Scale, Offset);
9164
9165    return Result;
9166
9167#elif defined(_XM_SSE_INTRINSICS_)
9168    XMMATRIX Transform;
9169    XMVECTOR Scale;
9170    XMVECTOR Offset;
9171    XMVECTOR Result;
9172    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9173    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9174
9175    Scale = XMVectorSet(HalfViewportWidth,
9176                        -HalfViewportHeight,
9177                        ViewportMaxZ - ViewportMinZ,
9178                        0.0f);
9179
9180    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9181                        ViewportY + HalfViewportHeight,
9182                        ViewportMinZ,
9183                        0.0f);
9184    Transform = XMMatrixMultiply(World, View);
9185    Transform = XMMatrixMultiply(Transform, Projection);
9186    Result = XMVector3TransformCoord(V, Transform);
9187    Result = _mm_mul_ps(Result,Scale);
9188    Result = _mm_add_ps(Result,Offset);
9189    return Result;
9190#else // _XM_VMX128_INTRINSICS_
9191#endif // _XM_VMX128_INTRINSICS_
9192}
9193
9194//------------------------------------------------------------------------------
9195
9196XMINLINE XMFLOAT3* XMVector3ProjectStream
9197(
9198    XMFLOAT3*       pOutputStream,
9199    UINT            OutputStride,
9200    CONST XMFLOAT3* pInputStream,
9201    UINT            InputStride,
9202    UINT            VectorCount,
9203    FLOAT           ViewportX,
9204    FLOAT           ViewportY,
9205    FLOAT           ViewportWidth,
9206    FLOAT           ViewportHeight,
9207    FLOAT           ViewportMinZ,
9208    FLOAT           ViewportMaxZ,
9209    CXMMATRIX     Projection,
9210    CXMMATRIX     View,
9211    CXMMATRIX     World
9212)
9213{
9214#if defined(_XM_NO_INTRINSICS_)
9215
9216    XMMATRIX Transform;
9217    XMVECTOR V;
9218    XMVECTOR Scale;
9219    XMVECTOR Offset;
9220    XMVECTOR Result;
9221    UINT     i;
9222    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9223    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9224    BYTE*    pInputVector = (BYTE*)pInputStream;
9225    BYTE*    pOutputVector = (BYTE*)pOutputStream;
9226
9227    XMASSERT(pOutputStream);
9228    XMASSERT(pInputStream);
9229
9230    Scale = XMVectorSet(HalfViewportWidth,
9231                        -HalfViewportHeight,
9232                        ViewportMaxZ - ViewportMinZ,
9233                        1.0f);
9234
9235    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9236                        ViewportY + HalfViewportHeight,
9237                        ViewportMinZ,
9238                        0.0f);
9239
9240    Transform = XMMatrixMultiply(World, View);
9241    Transform = XMMatrixMultiply(Transform, Projection);
9242
9243    for (i = 0; i < VectorCount; i++)
9244    {
9245        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9246
9247        Result = XMVector3TransformCoord(V, Transform);
9248
9249        Result = XMVectorMultiplyAdd(Result, Scale, Offset);
9250
9251        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9252
9253        pInputVector += InputStride;
9254        pOutputVector += OutputStride;
9255    }
9256
9257    return pOutputStream;
9258
9259#elif defined(_XM_SSE_INTRINSICS_)
9260	XMASSERT(pOutputStream);
9261    XMASSERT(pInputStream);
9262    XMMATRIX Transform;
9263    XMVECTOR V;
9264    XMVECTOR Scale;
9265    XMVECTOR Offset;
9266    XMVECTOR Result;
9267    UINT     i;
9268    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9269    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9270    BYTE*    pInputVector = (BYTE*)pInputStream;
9271    BYTE*    pOutputVector = (BYTE*)pOutputStream;
9272
9273    Scale = XMVectorSet(HalfViewportWidth,
9274                        -HalfViewportHeight,
9275                        ViewportMaxZ - ViewportMinZ,
9276                        1.0f);
9277
9278    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9279                        ViewportY + HalfViewportHeight,
9280                        ViewportMinZ,
9281                        0.0f);
9282
9283    Transform = XMMatrixMultiply(World, View);
9284    Transform = XMMatrixMultiply(Transform, Projection);
9285
9286    for (i = 0; i < VectorCount; i++)
9287    {
9288        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9289
9290        Result = XMVector3TransformCoord(V, Transform);
9291
9292        Result = _mm_mul_ps(Result,Scale);
9293        Result = _mm_add_ps(Result,Offset);
9294        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9295        pInputVector += InputStride;
9296        pOutputVector += OutputStride;
9297    }
9298    return pOutputStream;
9299
9300#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9301#endif // _XM_VMX128_INTRINSICS_
9302}
9303
9304//------------------------------------------------------------------------------
9305
9306XMFINLINE XMVECTOR XMVector3Unproject
9307(
9308    FXMVECTOR V,
9309    FLOAT    ViewportX,
9310    FLOAT    ViewportY,
9311    FLOAT    ViewportWidth,
9312    FLOAT    ViewportHeight,
9313    FLOAT    ViewportMinZ,
9314    FLOAT    ViewportMaxZ,
9315    CXMMATRIX Projection,
9316    CXMMATRIX View,
9317    CXMMATRIX World
9318)
9319{
9320#if defined(_XM_NO_INTRINSICS_)
9321
9322    XMMATRIX        Transform;
9323    XMVECTOR        Scale;
9324    XMVECTOR        Offset;
9325    XMVECTOR        Determinant;
9326    XMVECTOR        Result;
9327    CONST XMVECTOR  D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
9328
9329    Scale = XMVectorSet(ViewportWidth * 0.5f,
9330                        -ViewportHeight * 0.5f,
9331                        ViewportMaxZ - ViewportMinZ,
9332                        1.0f);
9333    Scale = XMVectorReciprocal(Scale);
9334
9335    Offset = XMVectorSet(-ViewportX,
9336                        -ViewportY,
9337                        -ViewportMinZ,
9338                        0.0f);
9339    Offset = XMVectorMultiplyAdd(Scale, Offset, D);
9340
9341    Transform = XMMatrixMultiply(World, View);
9342    Transform = XMMatrixMultiply(Transform, Projection);
9343    Transform = XMMatrixInverse(&Determinant, Transform);
9344
9345    Result = XMVectorMultiplyAdd(V, Scale, Offset);
9346
9347    Result = XMVector3TransformCoord(Result, Transform);
9348
9349    return Result;
9350
9351#elif defined(_XM_SSE_INTRINSICS_)
9352    XMMATRIX        Transform;
9353    XMVECTOR        Scale;
9354    XMVECTOR        Offset;
9355    XMVECTOR        Determinant;
9356    XMVECTOR        Result;
9357    CONST XMVECTORF32  D = {-1.0f, 1.0f, 0.0f, 0.0f};
9358
9359    Scale = XMVectorSet(ViewportWidth * 0.5f,
9360                        -ViewportHeight * 0.5f,
9361                        ViewportMaxZ - ViewportMinZ,
9362                        1.0f);
9363    Scale = XMVectorReciprocal(Scale);
9364
9365    Offset = XMVectorSet(-ViewportX,
9366                        -ViewportY,
9367                        -ViewportMinZ,
9368                        0.0f);
9369    Offset = _mm_mul_ps(Offset,Scale);
9370    Offset = _mm_add_ps(Offset,D);
9371
9372    Transform = XMMatrixMultiply(World, View);
9373    Transform = XMMatrixMultiply(Transform, Projection);
9374    Transform = XMMatrixInverse(&Determinant, Transform);
9375
9376    Result = _mm_mul_ps(V,Scale);
9377    Result = _mm_add_ps(Result,Offset);
9378
9379    Result = XMVector3TransformCoord(Result, Transform);
9380
9381    return Result;
9382#else // _XM_VMX128_INTRINSICS_
9383#endif // _XM_VMX128_INTRINSICS_
9384}
9385
9386//------------------------------------------------------------------------------
9387
9388XMINLINE XMFLOAT3* XMVector3UnprojectStream
9389(
9390    XMFLOAT3*       pOutputStream,
9391    UINT            OutputStride,
9392    CONST XMFLOAT3* pInputStream,
9393    UINT            InputStride,
9394    UINT            VectorCount,
9395    FLOAT           ViewportX,
9396    FLOAT           ViewportY,
9397    FLOAT           ViewportWidth,
9398    FLOAT           ViewportHeight,
9399    FLOAT           ViewportMinZ,
9400    FLOAT           ViewportMaxZ,
9401    CXMMATRIX     Projection,
9402    CXMMATRIX     View,
9403    CXMMATRIX     World)
9404{
9405#if defined(_XM_NO_INTRINSICS_)
9406
9407    XMMATRIX        Transform;
9408    XMVECTOR        Scale;
9409    XMVECTOR        Offset;
9410    XMVECTOR        V;
9411    XMVECTOR        Determinant;
9412    XMVECTOR        Result;
9413    UINT            i;
9414    BYTE*           pInputVector = (BYTE*)pInputStream;
9415    BYTE*           pOutputVector = (BYTE*)pOutputStream;
9416    CONST XMVECTOR  D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
9417
9418    XMASSERT(pOutputStream);
9419    XMASSERT(pInputStream);
9420
9421    Scale = XMVectorSet(ViewportWidth * 0.5f,
9422                        -ViewportHeight * 0.5f,
9423                        ViewportMaxZ - ViewportMinZ,
9424                        1.0f);
9425    Scale = XMVectorReciprocal(Scale);
9426
9427    Offset = XMVectorSet(-ViewportX,
9428                        -ViewportY,
9429                        -ViewportMinZ,
9430                        0.0f);
9431    Offset = XMVectorMultiplyAdd(Scale, Offset, D);
9432
9433    Transform = XMMatrixMultiply(World, View);
9434    Transform = XMMatrixMultiply(Transform, Projection);
9435    Transform = XMMatrixInverse(&Determinant, Transform);
9436
9437    for (i = 0; i < VectorCount; i++)
9438    {
9439        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9440
9441        Result = XMVectorMultiplyAdd(V, Scale, Offset);
9442
9443        Result = XMVector3TransformCoord(Result, Transform);
9444
9445        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9446
9447        pInputVector += InputStride;
9448        pOutputVector += OutputStride;
9449    }
9450
9451    return pOutputStream;
9452
9453#elif defined(_XM_SSE_INTRINSICS_)
9454    XMASSERT(pOutputStream);
9455    XMASSERT(pInputStream);
9456    XMMATRIX        Transform;
9457    XMVECTOR        Scale;
9458    XMVECTOR        Offset;
9459    XMVECTOR        V;
9460    XMVECTOR        Determinant;
9461    XMVECTOR        Result;
9462    UINT            i;
9463    BYTE*           pInputVector = (BYTE*)pInputStream;
9464    BYTE*           pOutputVector = (BYTE*)pOutputStream;
9465    CONST XMVECTORF32  D = {-1.0f, 1.0f, 0.0f, 0.0f};
9466
9467    Scale = XMVectorSet(ViewportWidth * 0.5f,
9468                        -ViewportHeight * 0.5f,
9469                        ViewportMaxZ - ViewportMinZ,
9470                        1.0f);
9471    Scale = XMVectorReciprocal(Scale);
9472
9473    Offset = XMVectorSet(-ViewportX,
9474                        -ViewportY,
9475                        -ViewportMinZ,
9476                        0.0f);
9477    Offset = _mm_mul_ps(Offset,Scale);
9478    Offset = _mm_add_ps(Offset,D);
9479
9480    Transform = XMMatrixMultiply(World, View);
9481    Transform = XMMatrixMultiply(Transform, Projection);
9482    Transform = XMMatrixInverse(&Determinant, Transform);
9483
9484    for (i = 0; i < VectorCount; i++)
9485    {
9486        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9487
9488        Result = XMVectorMultiplyAdd(V, Scale, Offset);
9489
9490        Result = XMVector3TransformCoord(Result, Transform);
9491
9492        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9493
9494        pInputVector += InputStride;
9495        pOutputVector += OutputStride;
9496    }
9497
9498    return pOutputStream;
9499#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9500#endif // _XM_VMX128_INTRINSICS_
9501}
9502
9503/****************************************************************************
9504 *
9505 * 4D Vector
9506 *
9507 ****************************************************************************/
9508
9509//------------------------------------------------------------------------------
9510// Comparison operations
9511//------------------------------------------------------------------------------
9512
9513//------------------------------------------------------------------------------
9514
9515XMFINLINE BOOL XMVector4Equal
9516(
9517    FXMVECTOR V1,
9518    FXMVECTOR V2
9519)
9520{
9521#if defined(_XM_NO_INTRINSICS_)
9522    return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0);
9523#elif defined(_XM_SSE_INTRINSICS_)
9524    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
9525    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9526#else
9527    return XMComparisonAllTrue(XMVector4EqualR(V1, V2));
9528#endif
9529}
9530
9531//------------------------------------------------------------------------------
9532
9533XMFINLINE UINT XMVector4EqualR
9534(
9535    FXMVECTOR V1,
9536    FXMVECTOR V2
9537)
9538{
9539#if defined(_XM_NO_INTRINSICS_)
9540
9541    UINT CR = 0;
9542
9543    if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
9544        (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
9545        (V1.vector4_f32[2] == V2.vector4_f32[2]) &&
9546        (V1.vector4_f32[3] == V2.vector4_f32[3]))
9547    {
9548        CR = XM_CRMASK_CR6TRUE;
9549    }
9550    else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
9551        (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
9552        (V1.vector4_f32[2] != V2.vector4_f32[2]) &&
9553        (V1.vector4_f32[3] != V2.vector4_f32[3]))
9554    {
9555        CR = XM_CRMASK_CR6FALSE;
9556    }
9557    return CR;
9558
9559#elif defined(_XM_SSE_INTRINSICS_)
9560    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
9561    int iTest = _mm_movemask_ps(vTemp);
9562    UINT CR = 0;
9563    if (iTest==0xf)     // All equal?
9564    {
9565        CR = XM_CRMASK_CR6TRUE;
9566    }
9567    else if (iTest==0)  // All not equal?
9568    {
9569        CR = XM_CRMASK_CR6FALSE;
9570    }
9571	return CR;
9572#else // _XM_VMX128_INTRINSICS_
9573#endif // _XM_VMX128_INTRINSICS_
9574}
9575
9576//------------------------------------------------------------------------------
9577
9578XMFINLINE BOOL XMVector4EqualInt
9579(
9580    FXMVECTOR V1,
9581    FXMVECTOR V2
9582)
9583{
9584#if defined(_XM_NO_INTRINSICS_)
9585    return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0);
9586#elif defined(_XM_SSE_INTRINSICS_)
9587    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9588    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0);
9589#else
9590    return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2));
9591#endif
9592}
9593
9594//------------------------------------------------------------------------------
9595
9596XMFINLINE UINT XMVector4EqualIntR
9597(
9598    FXMVECTOR V1,
9599    FXMVECTOR V2
9600)
9601{
9602#if defined(_XM_NO_INTRINSICS_)
9603    UINT CR = 0;
9604    if (V1.vector4_u32[0] == V2.vector4_u32[0] &&
9605        V1.vector4_u32[1] == V2.vector4_u32[1] &&
9606        V1.vector4_u32[2] == V2.vector4_u32[2] &&
9607        V1.vector4_u32[3] == V2.vector4_u32[3])
9608    {
9609        CR = XM_CRMASK_CR6TRUE;
9610    }
9611    else if (V1.vector4_u32[0] != V2.vector4_u32[0] &&
9612        V1.vector4_u32[1] != V2.vector4_u32[1] &&
9613        V1.vector4_u32[2] != V2.vector4_u32[2] &&
9614        V1.vector4_u32[3] != V2.vector4_u32[3])
9615    {
9616        CR = XM_CRMASK_CR6FALSE;
9617    }
9618    return CR;
9619
9620#elif defined(_XM_SSE_INTRINSICS_)
9621    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9622    int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]);
9623    UINT CR = 0;
9624    if (iTest==0xf)     // All equal?
9625    {
9626        CR = XM_CRMASK_CR6TRUE;
9627    }
9628    else if (iTest==0)  // All not equal?
9629    {
9630        CR = XM_CRMASK_CR6FALSE;
9631    }
9632	return CR;
9633#else // _XM_VMX128_INTRINSICS_
9634#endif // _XM_VMX128_INTRINSICS_
9635}
9636
9637XMFINLINE BOOL XMVector4NearEqual
9638(
9639    FXMVECTOR V1,
9640    FXMVECTOR V2,
9641    FXMVECTOR Epsilon
9642)
9643{
9644#if defined(_XM_NO_INTRINSICS_)
9645    FLOAT dx, dy, dz, dw;
9646
9647    dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
9648    dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
9649    dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
9650    dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]);
9651    return (((dx <= Epsilon.vector4_f32[0]) &&
9652            (dy <= Epsilon.vector4_f32[1]) &&
9653            (dz <= Epsilon.vector4_f32[2]) &&
9654            (dw <= Epsilon.vector4_f32[3])) != 0);
9655#elif defined(_XM_SSE_INTRINSICS_)
9656    // Get the difference
9657    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
9658    // Get the absolute value of the difference
9659    XMVECTOR vTemp = _mm_setzero_ps();
9660    vTemp = _mm_sub_ps(vTemp,vDelta);
9661    vTemp = _mm_max_ps(vTemp,vDelta);
9662    vTemp = _mm_cmple_ps(vTemp,Epsilon);
9663    return ((_mm_movemask_ps(vTemp)==0xf) != 0);
9664#else // _XM_VMX128_INTRINSICS_
9665#endif // _XM_VMX128_INTRINSICS_
9666}
9667
9668//------------------------------------------------------------------------------
9669
9670XMFINLINE BOOL XMVector4NotEqual
9671(
9672    FXMVECTOR V1,
9673    FXMVECTOR V2
9674)
9675{
9676#if defined(_XM_NO_INTRINSICS_)
9677    return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0);
9678#elif defined(_XM_SSE_INTRINSICS_)
9679    XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2);
9680    return ((_mm_movemask_ps(vTemp)) != 0);
9681#else
9682    return XMComparisonAnyFalse(XMVector4EqualR(V1, V2));
9683#endif
9684}
9685
9686//------------------------------------------------------------------------------
9687
9688XMFINLINE BOOL XMVector4NotEqualInt
9689(
9690    FXMVECTOR V1,
9691    FXMVECTOR V2
9692)
9693{
9694#if defined(_XM_NO_INTRINSICS_)
9695    return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0);
9696#elif defined(_XM_SSE_INTRINSICS_)
9697    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9698    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0);
9699#else
9700    return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2));
9701#endif
9702}
9703
9704//------------------------------------------------------------------------------
9705
9706XMFINLINE BOOL XMVector4Greater
9707(
9708    FXMVECTOR V1,
9709    FXMVECTOR V2
9710)
9711{
9712#if defined(_XM_NO_INTRINSICS_)
9713    return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0);
9714#elif defined(_XM_SSE_INTRINSICS_)
9715    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
9716    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9717#else
9718    return XMComparisonAllTrue(XMVector4GreaterR(V1, V2));
9719#endif
9720}
9721
9722//------------------------------------------------------------------------------
9723
9724XMFINLINE UINT XMVector4GreaterR
9725(
9726    FXMVECTOR V1,
9727    FXMVECTOR V2
9728)
9729{
9730#if defined(_XM_NO_INTRINSICS_)
9731    UINT CR = 0;
9732    if (V1.vector4_f32[0] > V2.vector4_f32[0] &&
9733        V1.vector4_f32[1] > V2.vector4_f32[1] &&
9734        V1.vector4_f32[2] > V2.vector4_f32[2] &&
9735        V1.vector4_f32[3] > V2.vector4_f32[3])
9736    {
9737        CR = XM_CRMASK_CR6TRUE;
9738    }
9739    else if (V1.vector4_f32[0] <= V2.vector4_f32[0] &&
9740        V1.vector4_f32[1] <= V2.vector4_f32[1] &&
9741        V1.vector4_f32[2] <= V2.vector4_f32[2] &&
9742        V1.vector4_f32[3] <= V2.vector4_f32[3])
9743    {
9744        CR = XM_CRMASK_CR6FALSE;
9745    }
9746    return CR;
9747
9748#elif defined(_XM_SSE_INTRINSICS_)
9749    UINT CR = 0;
9750	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
9751    int iTest = _mm_movemask_ps(vTemp);
9752    if (iTest==0xf) {
9753        CR = XM_CRMASK_CR6TRUE;
9754    }
9755    else if (!iTest)
9756    {
9757        CR = XM_CRMASK_CR6FALSE;
9758    }
9759    return CR;
9760#else // _XM_VMX128_INTRINSICS_
9761#endif // _XM_VMX128_INTRINSICS_
9762}
9763
9764//------------------------------------------------------------------------------
9765
9766XMFINLINE BOOL XMVector4GreaterOrEqual
9767(
9768    FXMVECTOR V1,
9769    FXMVECTOR V2
9770)
9771{
9772#if defined(_XM_NO_INTRINSICS_)
9773    return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0);
9774#elif defined(_XM_SSE_INTRINSICS_)
9775    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
9776    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9777#else
9778    return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2));
9779#endif
9780}
9781
9782//------------------------------------------------------------------------------
9783
9784XMFINLINE UINT XMVector4GreaterOrEqualR
9785(
9786    FXMVECTOR V1,
9787    FXMVECTOR V2
9788)
9789{
9790#if defined(_XM_NO_INTRINSICS_)
9791    UINT CR = 0;
9792    if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
9793        (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
9794        (V1.vector4_f32[2] >= V2.vector4_f32[2]) &&
9795        (V1.vector4_f32[3] >= V2.vector4_f32[3]))
9796    {
9797        CR = XM_CRMASK_CR6TRUE;
9798    }
9799    else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
9800        (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
9801        (V1.vector4_f32[2] < V2.vector4_f32[2]) &&
9802        (V1.vector4_f32[3] < V2.vector4_f32[3]))
9803    {
9804        CR = XM_CRMASK_CR6FALSE;
9805    }
9806    return CR;
9807
9808#elif defined(_XM_SSE_INTRINSICS_)
9809    UINT CR = 0;
9810	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
9811	int iTest = _mm_movemask_ps(vTemp);
9812    if (iTest==0x0f)
9813    {
9814        CR = XM_CRMASK_CR6TRUE;
9815    }
9816    else if (!iTest)
9817    {
9818        CR = XM_CRMASK_CR6FALSE;
9819    }
9820    return CR;
9821#else // _XM_VMX128_INTRINSICS_
9822#endif // _XM_VMX128_INTRINSICS_
9823}
9824
9825//------------------------------------------------------------------------------
9826
9827XMFINLINE BOOL XMVector4Less
9828(
9829    FXMVECTOR V1,
9830    FXMVECTOR V2
9831)
9832{
9833#if defined(_XM_NO_INTRINSICS_)
9834    return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0);
9835#elif defined(_XM_SSE_INTRINSICS_)
9836    XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
9837    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9838#else
9839    return XMComparisonAllTrue(XMVector4GreaterR(V2, V1));
9840#endif
9841}
9842
9843//------------------------------------------------------------------------------
9844
9845XMFINLINE BOOL XMVector4LessOrEqual
9846(
9847    FXMVECTOR V1,
9848    FXMVECTOR V2
9849)
9850{
9851#if defined(_XM_NO_INTRINSICS_)
9852    return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0);
9853#elif defined(_XM_SSE_INTRINSICS_)
9854    XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
9855    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9856#else
9857    return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1));
9858#endif
9859}
9860
9861//------------------------------------------------------------------------------
9862
9863XMFINLINE BOOL XMVector4InBounds
9864(
9865    FXMVECTOR V,
9866    FXMVECTOR Bounds
9867)
9868{
9869#if defined(_XM_NO_INTRINSICS_)
9870    return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
9871        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
9872        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
9873        (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0);
9874#elif defined(_XM_SSE_INTRINSICS_)
9875    // Test if less than or equal
9876    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
9877    // Negate the bounds
9878    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
9879    // Test if greater or equal (Reversed)
9880    vTemp2 = _mm_cmple_ps(vTemp2,V);
9881    // Blend answers
9882    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
9883    // All in bounds?
9884    return ((_mm_movemask_ps(vTemp1)==0x0f) != 0);
9885#else
9886    return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds));
9887#endif
9888}
9889
9890//------------------------------------------------------------------------------
9891
9892XMFINLINE UINT XMVector4InBoundsR
9893(
9894    FXMVECTOR V,
9895    FXMVECTOR Bounds
9896)
9897{
9898#if defined(_XM_NO_INTRINSICS_)
9899
9900    UINT CR = 0;
9901    if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
9902        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
9903        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
9904        (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]))
9905    {
9906        CR = XM_CRMASK_CR6BOUNDS;
9907    }
9908    return CR;
9909
9910#elif defined(_XM_SSE_INTRINSICS_)
9911    // Test if less than or equal
9912    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
9913    // Negate the bounds
9914    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
9915    // Test if greater or equal (Reversed)
9916    vTemp2 = _mm_cmple_ps(vTemp2,V);
9917    // Blend answers
9918    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
9919    // All in bounds?
9920    return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0;
9921#else // _XM_VMX128_INTRINSICS_
9922#endif // _XM_VMX128_INTRINSICS_
9923}
9924
9925//------------------------------------------------------------------------------
9926
9927XMFINLINE BOOL XMVector4IsNaN
9928(
9929    FXMVECTOR V
9930)
9931{
9932#if defined(_XM_NO_INTRINSICS_)
9933    return (XMISNAN(V.vector4_f32[0]) ||
9934            XMISNAN(V.vector4_f32[1]) ||
9935            XMISNAN(V.vector4_f32[2]) ||
9936            XMISNAN(V.vector4_f32[3]));
9937#elif defined(_XM_SSE_INTRINSICS_)
9938    // Test against itself. NaN is always not equal
9939    XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
9940    // If any are NaN, the mask is non-zero
9941    return (_mm_movemask_ps(vTempNan)!=0);
9942#else // _XM_VMX128_INTRINSICS_
9943#endif // _XM_VMX128_INTRINSICS_
9944}
9945
9946//------------------------------------------------------------------------------
9947
9948XMFINLINE BOOL XMVector4IsInfinite
9949(
9950    FXMVECTOR V
9951)
9952{
9953#if defined(_XM_NO_INTRINSICS_)
9954
9955    return (XMISINF(V.vector4_f32[0]) ||
9956            XMISINF(V.vector4_f32[1]) ||
9957            XMISINF(V.vector4_f32[2]) ||
9958            XMISINF(V.vector4_f32[3]));
9959
9960#elif defined(_XM_SSE_INTRINSICS_)
9961    // Mask off the sign bit
9962    XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask);
9963    // Compare to infinity
9964    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
9965    // If any are infinity, the signs are true.
9966    return (_mm_movemask_ps(vTemp) != 0);
9967#else // _XM_VMX128_INTRINSICS_
9968#endif // _XM_VMX128_INTRINSICS_
9969}
9970
9971//------------------------------------------------------------------------------
9972// Computation operations
9973//------------------------------------------------------------------------------
9974
9975//------------------------------------------------------------------------------
9976
9977XMFINLINE XMVECTOR XMVector4Dot
9978(
9979    FXMVECTOR V1,
9980    FXMVECTOR V2
9981)
9982{
9983#if defined(_XM_NO_INTRINSICS_)
9984
9985    XMVECTOR Result;
9986
9987    Result.vector4_f32[0] =
9988    Result.vector4_f32[1] =
9989    Result.vector4_f32[2] =
9990    Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3];
9991
9992    return Result;
9993
9994#elif defined(_XM_SSE_INTRINSICS_)
9995    XMVECTOR vTemp2 = V2;
9996    XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2);
9997    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
9998    vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;
9999    vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
10000    vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together
10001    return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
10002#else // _XM_VMX128_INTRINSICS_
10003#endif // _XM_VMX128_INTRINSICS_
10004}
10005
10006//------------------------------------------------------------------------------
10007
10008XMFINLINE XMVECTOR XMVector4Cross
10009(
10010    FXMVECTOR V1,
10011    FXMVECTOR V2,
10012    FXMVECTOR V3
10013)
10014{
10015#if defined(_XM_NO_INTRINSICS_)
10016    XMVECTOR Result;
10017
10018    Result.vector4_f32[0] = (((V2.vector4_f32[2]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[2]))*V1.vector4_f32[1])-(((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[2])+(((V2.vector4_f32[1]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[1]))*V1.vector4_f32[3]);
10019    Result.vector4_f32[1] = (((V2.vector4_f32[3]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[3]))*V1.vector4_f32[0])-(((V2.vector4_f32[3]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[3]))*V1.vector4_f32[2])+(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[3]);
10020    Result.vector4_f32[2] = (((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[0])-(((V2.vector4_f32[0]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[0]))*V1.vector4_f32[1])+(((V2.vector4_f32[0]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[0]))*V1.vector4_f32[3]);
10021    Result.vector4_f32[3] = (((V2.vector4_f32[2]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[2]))*V1.vector4_f32[0])-(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[1])+(((V2.vector4_f32[1]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[1]))*V1.vector4_f32[2]);
10022    return Result;
10023
10024#elif defined(_XM_SSE_INTRINSICS_)
10025    // V2zwyz * V3wzwy
10026    XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2));
10027    XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3));
10028    vResult = _mm_mul_ps(vResult,vTemp3);
10029    // - V2wzwy * V3zwyz
10030    XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3));
10031    vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1));
10032    vTemp2 = _mm_mul_ps(vTemp2,vTemp3);
10033    vResult = _mm_sub_ps(vResult,vTemp2);
10034    // term1 * V1yxxx
10035    XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1));
10036    vResult = _mm_mul_ps(vResult,vTemp1);
10037
10038    // V2ywxz * V3wxwx
10039    vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1));
10040    vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3));
10041    vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
10042    // - V2wxwx * V3ywxz
10043    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1));
10044    vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1));
10045    vTemp2 = _mm_mul_ps(vTemp2,vTemp1);
10046    vTemp3 = _mm_sub_ps(vTemp3,vTemp2);
10047    // vResult - temp * V1zzyy
10048    vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2));
10049    vTemp1 = _mm_mul_ps(vTemp1,vTemp3);
10050    vResult = _mm_sub_ps(vResult,vTemp1);
10051
10052    // V2yzxy * V3zxyx
10053    vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1));
10054    vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2));
10055    vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
10056    // - V2zxyx * V3yzxy
10057    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1));
10058    vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1));
10059    vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
10060    vTemp3 = _mm_sub_ps(vTemp3,vTemp1);
10061    // vResult + term * V1wwwz
10062    vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3));
10063    vTemp3 = _mm_mul_ps(vTemp3,vTemp1);
10064    vResult = _mm_add_ps(vResult,vTemp3);
10065    return vResult;
10066#else // _XM_VMX128_INTRINSICS_
10067#endif // _XM_VMX128_INTRINSICS_
10068}
10069
10070//------------------------------------------------------------------------------
10071
10072XMFINLINE XMVECTOR XMVector4LengthSq
10073(
10074    FXMVECTOR V
10075)
10076{
10077    return XMVector4Dot(V, V);
10078}
10079
10080//------------------------------------------------------------------------------
10081
10082XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst
10083(
10084    FXMVECTOR V
10085)
10086{
10087#if defined(_XM_NO_INTRINSICS_)
10088
10089    XMVECTOR Result;
10090
10091    Result = XMVector4LengthSq(V);
10092    Result = XMVectorReciprocalSqrtEst(Result);
10093
10094    return Result;
10095
10096#elif defined(_XM_SSE_INTRINSICS_)
10097    // Perform the dot product on x,y,z and w
10098    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10099    // vTemp has z and w
10100    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10101    // x+z, y+w
10102    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10103    // x+z,x+z,x+z,y+w
10104    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10105    // ??,??,y+w,y+w
10106    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10107    // ??,??,x+z+y+w,??
10108    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10109    // Splat the length
10110	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10111    // Get the reciprocal
10112    vLengthSq = _mm_rsqrt_ps(vLengthSq);
10113    return vLengthSq;
10114#else // _XM_VMX128_INTRINSICS_
10115#endif // _XM_VMX128_INTRINSICS_
10116}
10117
10118//------------------------------------------------------------------------------
10119
10120XMFINLINE XMVECTOR XMVector4ReciprocalLength
10121(
10122    FXMVECTOR V
10123)
10124{
10125#if defined(_XM_NO_INTRINSICS_)
10126
10127    XMVECTOR Result;
10128
10129    Result = XMVector4LengthSq(V);
10130    Result = XMVectorReciprocalSqrt(Result);
10131
10132    return Result;
10133
10134#elif defined(_XM_SSE_INTRINSICS_)
10135    // Perform the dot product on x,y,z and w
10136    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10137    // vTemp has z and w
10138    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10139    // x+z, y+w
10140    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10141    // x+z,x+z,x+z,y+w
10142    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10143    // ??,??,y+w,y+w
10144    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10145    // ??,??,x+z+y+w,??
10146    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10147    // Splat the length
10148	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10149    // Get the reciprocal
10150    vLengthSq = _mm_sqrt_ps(vLengthSq);
10151    // Accurate!
10152    vLengthSq = _mm_div_ps(g_XMOne,vLengthSq);
10153    return vLengthSq;
10154#else // _XM_VMX128_INTRINSICS_
10155#endif // _XM_VMX128_INTRINSICS_
10156}
10157
10158//------------------------------------------------------------------------------
10159
10160XMFINLINE XMVECTOR XMVector4LengthEst
10161(
10162    FXMVECTOR V
10163)
10164{
10165#if defined(_XM_NO_INTRINSICS_)
10166
10167    XMVECTOR Result;
10168
10169    Result = XMVector4LengthSq(V);
10170    Result = XMVectorSqrtEst(Result);
10171
10172    return Result;
10173
10174#elif defined(_XM_SSE_INTRINSICS_)
10175    // Perform the dot product on x,y,z and w
10176    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10177    // vTemp has z and w
10178    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10179    // x+z, y+w
10180    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10181    // x+z,x+z,x+z,y+w
10182    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10183    // ??,??,y+w,y+w
10184    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10185    // ??,??,x+z+y+w,??
10186    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10187    // Splat the length
10188	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10189    // Prepare for the division
10190    vLengthSq = _mm_sqrt_ps(vLengthSq);
10191    return vLengthSq;
10192#else // _XM_VMX128_INTRINSICS_
10193#endif // _XM_VMX128_INTRINSICS_
10194}
10195
10196//------------------------------------------------------------------------------
10197
10198XMFINLINE XMVECTOR XMVector4Length
10199(
10200    FXMVECTOR V
10201)
10202{
10203#if defined(_XM_NO_INTRINSICS_)
10204
10205    XMVECTOR Result;
10206
10207    Result = XMVector4LengthSq(V);
10208    Result = XMVectorSqrt(Result);
10209
10210    return Result;
10211
10212#elif defined(_XM_SSE_INTRINSICS_)
10213    // Perform the dot product on x,y,z and w
10214    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10215    // vTemp has z and w
10216    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10217    // x+z, y+w
10218    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10219    // x+z,x+z,x+z,y+w
10220    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10221    // ??,??,y+w,y+w
10222    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10223    // ??,??,x+z+y+w,??
10224    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10225    // Splat the length
10226	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10227    // Prepare for the division
10228    vLengthSq = _mm_sqrt_ps(vLengthSq);
10229    return vLengthSq;
10230#else // _XM_VMX128_INTRINSICS_
10231#endif // _XM_VMX128_INTRINSICS_
10232}
10233
10234//------------------------------------------------------------------------------
10235// XMVector4NormalizeEst uses a reciprocal estimate and
10236// returns QNaN on zero and infinite vectors.
10237
10238XMFINLINE XMVECTOR XMVector4NormalizeEst
10239(
10240    FXMVECTOR V
10241)
10242{
10243#if defined(_XM_NO_INTRINSICS_)
10244
10245    XMVECTOR Result;
10246    Result = XMVector4ReciprocalLength(V);
10247    Result = XMVectorMultiply(V, Result);
10248    return Result;
10249
10250#elif defined(_XM_SSE_INTRINSICS_)
10251    // Perform the dot product on x,y,z and w
10252    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10253    // vTemp has z and w
10254    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10255    // x+z, y+w
10256    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10257    // x+z,x+z,x+z,y+w
10258    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10259    // ??,??,y+w,y+w
10260    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10261    // ??,??,x+z+y+w,??
10262    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10263    // Splat the length
10264	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10265    // Get the reciprocal
10266    XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq);
10267    // Reciprocal mul to perform the normalization
10268    vResult = _mm_mul_ps(vResult,V);
10269    return vResult;
10270#else // _XM_VMX128_INTRINSICS_
10271#endif // _XM_VMX128_INTRINSICS_
10272}
10273
10274//------------------------------------------------------------------------------
10275
10276XMFINLINE XMVECTOR XMVector4Normalize
10277(
10278    FXMVECTOR V
10279)
10280{
10281#if defined(_XM_NO_INTRINSICS_)
10282    FLOAT fLength;
10283    XMVECTOR vResult;
10284
10285    vResult = XMVector4Length( V );
10286    fLength = vResult.vector4_f32[0];
10287
10288    // Prevent divide by zero
10289    if (fLength > 0) {
10290        fLength = 1.0f/fLength;
10291    }
10292
10293    vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
10294    vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
10295    vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
10296    vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
10297    return vResult;
10298
10299#elif defined(_XM_SSE_INTRINSICS_)
10300    // Perform the dot product on x,y,z and w
10301    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10302    // vTemp has z and w
10303    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10304    // x+z, y+w
10305    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10306    // x+z,x+z,x+z,y+w
10307    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10308    // ??,??,y+w,y+w
10309    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10310    // ??,??,x+z+y+w,??
10311    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10312    // Splat the length
10313	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10314    // Prepare for the division
10315    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
10316    // Create zero with a single instruction
10317    XMVECTOR vZeroMask = _mm_setzero_ps();
10318    // Test for a divide by zero (Must be FP to detect -0.0)
10319    vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
10320    // Failsafe on zero (Or epsilon) length planes
10321    // If the length is infinity, set the elements to zero
10322    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
10323    // Divide to perform the normalization
10324    vResult = _mm_div_ps(V,vResult);
10325    // Any that are infinity, set to zero
10326    vResult = _mm_and_ps(vResult,vZeroMask);
10327    // Select qnan or result based on infinite length
10328	XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
10329    XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
10330    vResult = _mm_or_ps(vTemp1,vTemp2);
10331    return vResult;
10332#else // _XM_VMX128_INTRINSICS_
10333#endif // _XM_VMX128_INTRINSICS_
10334}
10335
10336//------------------------------------------------------------------------------
10337
10338XMFINLINE XMVECTOR XMVector4ClampLength
10339(
10340    FXMVECTOR V,
10341    FLOAT    LengthMin,
10342    FLOAT    LengthMax
10343)
10344{
10345#if defined(_XM_NO_INTRINSICS_)
10346
10347    XMVECTOR ClampMax;
10348    XMVECTOR ClampMin;
10349
10350    ClampMax = XMVectorReplicate(LengthMax);
10351    ClampMin = XMVectorReplicate(LengthMin);
10352
10353    return XMVector4ClampLengthV(V, ClampMin, ClampMax);
10354
10355#elif defined(_XM_SSE_INTRINSICS_)
10356    XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
10357    XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
10358    return XMVector4ClampLengthV(V, ClampMin, ClampMax);
10359#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10360#endif // _XM_VMX128_INTRINSICS_
10361}
10362
10363//------------------------------------------------------------------------------
10364
10365XMFINLINE XMVECTOR XMVector4ClampLengthV
10366(
10367    FXMVECTOR V,
10368    FXMVECTOR LengthMin,
10369    FXMVECTOR LengthMax
10370)
10371{
10372#if defined(_XM_NO_INTRINSICS_)
10373
10374    XMVECTOR ClampLength;
10375    XMVECTOR LengthSq;
10376    XMVECTOR RcpLength;
10377    XMVECTOR Length;
10378    XMVECTOR Normal;
10379    XMVECTOR Zero;
10380    XMVECTOR InfiniteLength;
10381    XMVECTOR ZeroLength;
10382    XMVECTOR Select;
10383    XMVECTOR ControlMax;
10384    XMVECTOR ControlMin;
10385    XMVECTOR Control;
10386    XMVECTOR Result;
10387
10388    XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[3] == LengthMin.vector4_f32[0]));
10389    XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[3] == LengthMax.vector4_f32[0]));
10390    XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero()));
10391    XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero()));
10392    XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
10393
10394    LengthSq = XMVector4LengthSq(V);
10395
10396    Zero = XMVectorZero();
10397
10398    RcpLength = XMVectorReciprocalSqrt(LengthSq);
10399
10400    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
10401    ZeroLength = XMVectorEqual(LengthSq, Zero);
10402
10403    Normal = XMVectorMultiply(V, RcpLength);
10404
10405    Length = XMVectorMultiply(LengthSq, RcpLength);
10406
10407    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
10408    Length = XMVectorSelect(LengthSq, Length, Select);
10409    Normal = XMVectorSelect(LengthSq, Normal, Select);
10410
10411    ControlMax = XMVectorGreater(Length, LengthMax);
10412    ControlMin = XMVectorLess(Length, LengthMin);
10413
10414    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
10415    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
10416
10417    Result = XMVectorMultiply(Normal, ClampLength);
10418
10419    // Preserve the original vector (with no precision loss) if the length falls within the given range
10420    Control = XMVectorEqualInt(ControlMax, ControlMin);
10421    Result = XMVectorSelect(Result, V, Control);
10422
10423    return Result;
10424
10425#elif defined(_XM_SSE_INTRINSICS_)
10426    XMVECTOR ClampLength;
10427    XMVECTOR LengthSq;
10428    XMVECTOR RcpLength;
10429    XMVECTOR Length;
10430    XMVECTOR Normal;
10431    XMVECTOR Zero;
10432    XMVECTOR InfiniteLength;
10433    XMVECTOR ZeroLength;
10434    XMVECTOR Select;
10435    XMVECTOR ControlMax;
10436    XMVECTOR ControlMin;
10437    XMVECTOR Control;
10438    XMVECTOR Result;
10439
10440    XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin)));
10441    XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax)));
10442    XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero));
10443    XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero));
10444    XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
10445
10446    LengthSq = XMVector4LengthSq(V);
10447    Zero = XMVectorZero();
10448    RcpLength = XMVectorReciprocalSqrt(LengthSq);
10449    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
10450    ZeroLength = XMVectorEqual(LengthSq, Zero);
10451    Normal = _mm_mul_ps(V, RcpLength);
10452    Length = _mm_mul_ps(LengthSq, RcpLength);
10453    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
10454    Length = XMVectorSelect(LengthSq, Length, Select);
10455    Normal = XMVectorSelect(LengthSq, Normal, Select);
10456    ControlMax = XMVectorGreater(Length, LengthMax);
10457    ControlMin = XMVectorLess(Length, LengthMin);
10458    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
10459    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
10460    Result = _mm_mul_ps(Normal, ClampLength);
10461    // Preserve the original vector (with no precision loss) if the length falls within the given range
10462    Control = XMVectorEqualInt(ControlMax,ControlMin);
10463    Result = XMVectorSelect(Result,V,Control);
10464    return Result;
10465
10466#else // _XM_VMX128_INTRINSICS_
10467#endif // _XM_VMX128_INTRINSICS_
10468}
10469
10470//------------------------------------------------------------------------------
10471
10472XMFINLINE XMVECTOR XMVector4Reflect
10473(
10474    FXMVECTOR Incident,
10475    FXMVECTOR Normal
10476)
10477{
10478#if defined(_XM_NO_INTRINSICS_)
10479
10480    XMVECTOR Result;
10481
10482    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
10483    Result = XMVector4Dot(Incident, Normal);
10484    Result = XMVectorAdd(Result, Result);
10485    Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
10486
10487    return Result;
10488
10489#elif defined(_XM_SSE_INTRINSICS_)
10490    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
10491    XMVECTOR Result = XMVector4Dot(Incident,Normal);
10492    Result = _mm_add_ps(Result,Result);
10493    Result = _mm_mul_ps(Result,Normal);
10494    Result = _mm_sub_ps(Incident,Result);
10495    return Result;
10496#else // _XM_VMX128_INTRINSICS_
10497#endif // _XM_VMX128_INTRINSICS_
10498}
10499
10500//------------------------------------------------------------------------------
10501
10502XMFINLINE XMVECTOR XMVector4Refract
10503(
10504    FXMVECTOR Incident,
10505    FXMVECTOR Normal,
10506    FLOAT    RefractionIndex
10507)
10508{
10509#if defined(_XM_NO_INTRINSICS_)
10510
10511    XMVECTOR Index;
10512    Index = XMVectorReplicate(RefractionIndex);
10513    return XMVector4RefractV(Incident, Normal, Index);
10514
10515#elif defined(_XM_SSE_INTRINSICS_)
10516    XMVECTOR Index = _mm_set_ps1(RefractionIndex);
10517    return XMVector4RefractV(Incident,Normal,Index);
10518#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10519#endif // _XM_VMX128_INTRINSICS_
10520}
10521
10522//------------------------------------------------------------------------------
10523
10524XMFINLINE XMVECTOR XMVector4RefractV
10525(
10526    FXMVECTOR Incident,
10527    FXMVECTOR Normal,
10528    FXMVECTOR RefractionIndex
10529)
10530{
10531#if defined(_XM_NO_INTRINSICS_)
10532
10533    XMVECTOR        IDotN;
10534    XMVECTOR        R;
10535    CONST XMVECTOR  Zero = XMVectorZero();
10536
10537    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
10538    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
10539
10540    IDotN = XMVector4Dot(Incident, Normal);
10541
10542    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
10543    R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
10544    R = XMVectorMultiply(R, RefractionIndex);
10545    R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
10546
10547    if (XMVector4LessOrEqual(R, Zero))
10548    {
10549        // Total internal reflection
10550        return Zero;
10551    }
10552    else
10553    {
10554        XMVECTOR Result;
10555
10556        // R = RefractionIndex * IDotN + sqrt(R)
10557        R = XMVectorSqrt(R);
10558        R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
10559
10560        // Result = RefractionIndex * Incident - Normal * R
10561        Result = XMVectorMultiply(RefractionIndex, Incident);
10562        Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
10563
10564        return Result;
10565    }
10566
10567#elif defined(_XM_SSE_INTRINSICS_)
10568    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
10569    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
10570
10571    XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
10572
10573    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
10574    XMVECTOR R = _mm_mul_ps(IDotN,IDotN);
10575    R = _mm_sub_ps(g_XMOne,R);
10576    R = _mm_mul_ps(R, RefractionIndex);
10577    R = _mm_mul_ps(R, RefractionIndex);
10578    R = _mm_sub_ps(g_XMOne,R);
10579
10580    XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
10581    if (_mm_movemask_ps(vResult)==0x0f)
10582    {
10583        // Total internal reflection
10584        vResult = g_XMZero;
10585    }
10586    else
10587    {
10588        // R = RefractionIndex * IDotN + sqrt(R)
10589        R = _mm_sqrt_ps(R);
10590        vResult = _mm_mul_ps(RefractionIndex, IDotN);
10591        R = _mm_add_ps(R,vResult);
10592        // Result = RefractionIndex * Incident - Normal * R
10593        vResult = _mm_mul_ps(RefractionIndex, Incident);
10594        R = _mm_mul_ps(R,Normal);
10595        vResult = _mm_sub_ps(vResult,R);
10596    }
10597    return vResult;
10598#else // _XM_VMX128_INTRINSICS_
10599#endif // _XM_VMX128_INTRINSICS_
10600}
10601
10602//------------------------------------------------------------------------------
10603
10604XMFINLINE XMVECTOR XMVector4Orthogonal
10605(
10606    FXMVECTOR V
10607)
10608{
10609#if defined(_XM_NO_INTRINSICS_)
10610
10611    XMVECTOR Result;
10612    Result.vector4_f32[0] = V.vector4_f32[2];
10613    Result.vector4_f32[1] = V.vector4_f32[3];
10614    Result.vector4_f32[2] = -V.vector4_f32[0];
10615    Result.vector4_f32[3] = -V.vector4_f32[1];
10616    return Result;
10617
10618#elif defined(_XM_SSE_INTRINSICS_)
10619    static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f};
10620    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2));
10621    vResult = _mm_mul_ps(vResult,FlipZW);
10622    return vResult;
10623#else // _XM_VMX128_INTRINSICS_
10624#endif // _XM_VMX128_INTRINSICS_
10625}
10626
10627//------------------------------------------------------------------------------
10628
10629XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst
10630(
10631    FXMVECTOR N1,
10632    FXMVECTOR N2
10633)
10634{
10635#if defined(_XM_NO_INTRINSICS_)
10636
10637    XMVECTOR NegativeOne;
10638    XMVECTOR One;
10639    XMVECTOR Result;
10640
10641    Result = XMVector4Dot(N1, N2);
10642    NegativeOne = XMVectorSplatConstant(-1, 0);
10643    One = XMVectorSplatOne();
10644    Result = XMVectorClamp(Result, NegativeOne, One);
10645    Result = XMVectorACosEst(Result);
10646
10647    return Result;
10648
10649#elif defined(_XM_SSE_INTRINSICS_)
10650    XMVECTOR vResult = XMVector4Dot(N1,N2);
10651    // Clamp to -1.0f to 1.0f
10652    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
10653    vResult = _mm_min_ps(vResult,g_XMOne);;
10654    vResult = XMVectorACosEst(vResult);
10655    return vResult;
10656#else // _XM_VMX128_INTRINSICS_
10657#endif // _XM_VMX128_INTRINSICS_
10658}
10659
10660//------------------------------------------------------------------------------
10661
10662XMFINLINE XMVECTOR XMVector4AngleBetweenNormals
10663(
10664    FXMVECTOR N1,
10665    FXMVECTOR N2
10666)
10667{
10668#if defined(_XM_NO_INTRINSICS_)
10669
10670    XMVECTOR NegativeOne;
10671    XMVECTOR One;
10672    XMVECTOR Result;
10673
10674    Result = XMVector4Dot(N1, N2);
10675    NegativeOne = XMVectorSplatConstant(-1, 0);
10676    One = XMVectorSplatOne();
10677    Result = XMVectorClamp(Result, NegativeOne, One);
10678    Result = XMVectorACos(Result);
10679
10680    return Result;
10681
10682#elif defined(_XM_SSE_INTRINSICS_)
10683    XMVECTOR vResult = XMVector4Dot(N1,N2);
10684    // Clamp to -1.0f to 1.0f
10685    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
10686    vResult = _mm_min_ps(vResult,g_XMOne);;
10687    vResult = XMVectorACos(vResult);
10688    return vResult;
10689#else // _XM_VMX128_INTRINSICS_
10690#endif // _XM_VMX128_INTRINSICS_
10691}
10692
10693//------------------------------------------------------------------------------
10694
10695XMFINLINE XMVECTOR XMVector4AngleBetweenVectors
10696(
10697    FXMVECTOR V1,
10698    FXMVECTOR V2
10699)
10700{
10701#if defined(_XM_NO_INTRINSICS_)
10702
10703    XMVECTOR L1;
10704    XMVECTOR L2;
10705    XMVECTOR Dot;
10706    XMVECTOR CosAngle;
10707    XMVECTOR NegativeOne;
10708    XMVECTOR One;
10709    XMVECTOR Result;
10710
10711    L1 = XMVector4ReciprocalLength(V1);
10712    L2 = XMVector4ReciprocalLength(V2);
10713
10714    Dot = XMVector4Dot(V1, V2);
10715
10716    L1 = XMVectorMultiply(L1, L2);
10717
10718    CosAngle = XMVectorMultiply(Dot, L1);
10719    NegativeOne = XMVectorSplatConstant(-1, 0);
10720    One = XMVectorSplatOne();
10721    CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
10722
10723    Result = XMVectorACos(CosAngle);
10724
10725    return Result;
10726
10727#elif defined(_XM_SSE_INTRINSICS_)
10728    XMVECTOR L1;
10729    XMVECTOR L2;
10730    XMVECTOR Dot;
10731    XMVECTOR CosAngle;
10732    XMVECTOR Result;
10733
10734    L1 = XMVector4ReciprocalLength(V1);
10735    L2 = XMVector4ReciprocalLength(V2);
10736    Dot = XMVector4Dot(V1, V2);
10737    L1 = _mm_mul_ps(L1,L2);
10738    CosAngle = _mm_mul_ps(Dot,L1);
10739    CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne);
10740    Result = XMVectorACos(CosAngle);
10741    return Result;
10742
10743#else // _XM_VMX128_INTRINSICS_
10744#endif // _XM_VMX128_INTRINSICS_
10745}
10746
10747//------------------------------------------------------------------------------
10748
10749XMFINLINE XMVECTOR XMVector4Transform
10750(
10751    FXMVECTOR V,
10752    CXMMATRIX M
10753)
10754{
10755#if defined(_XM_NO_INTRINSICS_)
10756    FLOAT fX = (M.m[0][0]*V.vector4_f32[0])+(M.m[1][0]*V.vector4_f32[1])+(M.m[2][0]*V.vector4_f32[2])+(M.m[3][0]*V.vector4_f32[3]);
10757    FLOAT fY = (M.m[0][1]*V.vector4_f32[0])+(M.m[1][1]*V.vector4_f32[1])+(M.m[2][1]*V.vector4_f32[2])+(M.m[3][1]*V.vector4_f32[3]);
10758    FLOAT fZ = (M.m[0][2]*V.vector4_f32[0])+(M.m[1][2]*V.vector4_f32[1])+(M.m[2][2]*V.vector4_f32[2])+(M.m[3][2]*V.vector4_f32[3]);
10759    FLOAT fW = (M.m[0][3]*V.vector4_f32[0])+(M.m[1][3]*V.vector4_f32[1])+(M.m[2][3]*V.vector4_f32[2])+(M.m[3][3]*V.vector4_f32[3]);
10760    XMVECTOR vResult = {
10761        fX,
10762        fY,
10763        fZ,
10764        fW
10765    };
10766    return vResult;
10767
10768#elif defined(_XM_SSE_INTRINSICS_)
10769    // Splat x,y,z and w
10770    XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
10771    XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
10772    XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
10773    XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
10774    // Mul by the matrix
10775    vTempX = _mm_mul_ps(vTempX,M.r[0]);
10776    vTempY = _mm_mul_ps(vTempY,M.r[1]);
10777    vTempZ = _mm_mul_ps(vTempZ,M.r[2]);
10778    vTempW = _mm_mul_ps(vTempW,M.r[3]);
10779    // Add them all together
10780    vTempX = _mm_add_ps(vTempX,vTempY);
10781    vTempZ = _mm_add_ps(vTempZ,vTempW);
10782    vTempX = _mm_add_ps(vTempX,vTempZ);
10783    return vTempX;
10784#else // _XM_VMX128_INTRINSICS_
10785#endif // _XM_VMX128_INTRINSICS_
10786}
10787
10788//------------------------------------------------------------------------------
10789
10790XMINLINE XMFLOAT4* XMVector4TransformStream
10791(
10792    XMFLOAT4*       pOutputStream,
10793    UINT            OutputStride,
10794    CONST XMFLOAT4* pInputStream,
10795    UINT            InputStride,
10796    UINT            VectorCount,
10797    CXMMATRIX     M
10798)
10799{
10800#if defined(_XM_NO_INTRINSICS_)
10801
10802    XMVECTOR V;
10803    XMVECTOR X;
10804    XMVECTOR Y;
10805    XMVECTOR Z;
10806    XMVECTOR W;
10807    XMVECTOR Result;
10808    UINT     i;
10809    BYTE*    pInputVector = (BYTE*)pInputStream;
10810    BYTE*    pOutputVector = (BYTE*)pOutputStream;
10811
10812    XMASSERT(pOutputStream);
10813    XMASSERT(pInputStream);
10814
10815    for (i = 0; i < VectorCount; i++)
10816    {
10817        V = XMLoadFloat4((XMFLOAT4*)pInputVector);
10818        W = XMVectorSplatW(V);
10819        Z = XMVectorSplatZ(V);
10820        Y = XMVectorSplatY(V);
10821        X = XMVectorSplatX(V);
10822//        W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w);
10823//        Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z);
10824//        Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y);
10825//        X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x);
10826
10827        Result = XMVectorMultiply(W, M.r[3]);
10828        Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
10829        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
10830        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
10831
10832        XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
10833
10834        pInputVector += InputStride;
10835        pOutputVector += OutputStride;
10836    }
10837
10838    return pOutputStream;
10839
10840#elif defined(_XM_SSE_INTRINSICS_)
10841    UINT i;
10842
10843    XMASSERT(pOutputStream);
10844    XMASSERT(pInputStream);
10845
10846    const BYTE*pInputVector = reinterpret_cast<const BYTE *>(pInputStream);
10847    BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream);
10848    for (i = 0; i < VectorCount; i++)
10849    {
10850        // Fetch the row and splat it
10851        XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector));
10852        XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1));
10853        XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2));
10854        XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3));
10855        vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0));
10856        vTempx = _mm_mul_ps(vTempx,M.r[0]);
10857        vTempy = _mm_mul_ps(vTempy,M.r[1]);
10858        vTempz = _mm_mul_ps(vTempz,M.r[2]);
10859        vTempw = _mm_mul_ps(vTempw,M.r[3]);
10860        vTempx = _mm_add_ps(vTempx,vTempy);
10861        vTempw = _mm_add_ps(vTempw,vTempz);
10862        vTempw = _mm_add_ps(vTempw,vTempx);
10863        // Store the transformed vector
10864        _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw);
10865
10866        pInputVector += InputStride;
10867        pOutputVector += OutputStride;
10868    }
10869    return pOutputStream;
10870#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10871#endif // _XM_VMX128_INTRINSICS_
10872}
10873
10874#ifdef __cplusplus
10875
10876/****************************************************************************
10877 *
10878 * XMVECTOR operators
10879 *
10880 ****************************************************************************/
10881
10882#ifndef XM_NO_OPERATOR_OVERLOADS
10883
10884//------------------------------------------------------------------------------
10885
10886XMFINLINE XMVECTOR operator+ (FXMVECTOR V)
10887{
10888    return V;
10889}
10890
10891//------------------------------------------------------------------------------
10892
10893XMFINLINE XMVECTOR operator- (FXMVECTOR V)
10894{
10895    return XMVectorNegate(V);
10896}
10897
10898//------------------------------------------------------------------------------
10899
10900XMFINLINE XMVECTOR& operator+=
10901(
10902    XMVECTOR&       V1,
10903    FXMVECTOR       V2
10904)
10905{
10906    V1 = XMVectorAdd(V1, V2);
10907    return V1;
10908}
10909
10910//------------------------------------------------------------------------------
10911
10912XMFINLINE XMVECTOR& operator-=
10913(
10914    XMVECTOR&       V1,
10915    FXMVECTOR       V2
10916)
10917{
10918    V1 = XMVectorSubtract(V1, V2);
10919    return V1;
10920}
10921
10922//------------------------------------------------------------------------------
10923
10924XMFINLINE XMVECTOR& operator*=
10925(
10926    XMVECTOR&       V1,
10927    FXMVECTOR       V2
10928)
10929{
10930    V1 = XMVectorMultiply(V1, V2);
10931    return V1;
10932}
10933
10934//------------------------------------------------------------------------------
10935
10936XMFINLINE XMVECTOR& operator/=
10937(
10938    XMVECTOR&       V1,
10939    FXMVECTOR       V2
10940)
10941{
10942    V1 = XMVectorDivide(V1,V2);
10943    return V1;
10944}
10945
10946//------------------------------------------------------------------------------
10947
10948XMFINLINE XMVECTOR& operator*=
10949(
10950    XMVECTOR&   V,
10951    CONST FLOAT S
10952)
10953{
10954    V = XMVectorScale(V, S);
10955    return V;
10956}
10957
10958//------------------------------------------------------------------------------
10959
10960XMFINLINE XMVECTOR& operator/=
10961(
10962    XMVECTOR&   V,
10963    CONST FLOAT S
10964)
10965{
10966    V = XMVectorScale(V, 1.0f / S);
10967    return V;
10968}
10969
10970//------------------------------------------------------------------------------
10971
10972XMFINLINE XMVECTOR operator+
10973(
10974    FXMVECTOR V1,
10975    FXMVECTOR V2
10976)
10977{
10978    return XMVectorAdd(V1, V2);
10979}
10980
10981//------------------------------------------------------------------------------
10982
10983XMFINLINE XMVECTOR operator-
10984(
10985    FXMVECTOR V1,
10986    FXMVECTOR V2
10987)
10988{
10989    return XMVectorSubtract(V1, V2);
10990}
10991
10992//------------------------------------------------------------------------------
10993
10994XMFINLINE XMVECTOR operator*
10995(
10996    FXMVECTOR V1,
10997    FXMVECTOR V2
10998)
10999{
11000    return XMVectorMultiply(V1, V2);
11001}
11002
11003//------------------------------------------------------------------------------
11004
11005XMFINLINE XMVECTOR operator/
11006(
11007    FXMVECTOR V1,
11008    FXMVECTOR V2
11009)
11010{
11011    return XMVectorDivide(V1,V2);
11012}
11013
11014//------------------------------------------------------------------------------
11015
11016XMFINLINE XMVECTOR operator*
11017(
11018    FXMVECTOR      V,
11019    CONST FLOAT    S
11020)
11021{
11022    return XMVectorScale(V, S);
11023}
11024
11025//------------------------------------------------------------------------------
11026
11027XMFINLINE XMVECTOR operator/
11028(
11029    FXMVECTOR      V,
11030    CONST FLOAT    S
11031)
11032{
11033    return XMVectorScale(V, 1.0f / S);
11034}
11035
11036//------------------------------------------------------------------------------
11037
11038XMFINLINE XMVECTOR operator*
11039(
11040    FLOAT           S,
11041    FXMVECTOR  	    V
11042)
11043{
11044    return XMVectorScale(V, S);
11045}
11046
11047#endif // !XM_NO_OPERATOR_OVERLOADS
11048
11049/****************************************************************************
11050 *
11051 * XMFLOAT2 operators
11052 *
11053 ****************************************************************************/
11054
11055//------------------------------------------------------------------------------
11056
11057XMFINLINE _XMFLOAT2::_XMFLOAT2
11058(
11059    CONST FLOAT* pArray
11060)
11061{
11062    x = pArray[0];
11063    y = pArray[1];
11064}
11065
11066//------------------------------------------------------------------------------
11067
11068XMFINLINE _XMFLOAT2& _XMFLOAT2::operator=
11069(
11070    CONST _XMFLOAT2& Float2
11071)
11072{
11073    x = Float2.x;
11074    y = Float2.y;
11075    return *this;
11076}
11077
11078//------------------------------------------------------------------------------
11079
11080XMFINLINE XMFLOAT2A& XMFLOAT2A::operator=
11081(
11082    CONST XMFLOAT2A& Float2
11083)
11084{
11085    x = Float2.x;
11086    y = Float2.y;
11087    return *this;
11088}
11089
11090/****************************************************************************
11091 *
11092 * XMHALF2 operators
11093 *
11094 ****************************************************************************/
11095
11096//------------------------------------------------------------------------------
11097
11098XMFINLINE _XMHALF2::_XMHALF2
11099(
11100    CONST HALF* pArray
11101)
11102{
11103    x = pArray[0];
11104    y = pArray[1];
11105}
11106
11107//------------------------------------------------------------------------------
11108
11109XMFINLINE _XMHALF2::_XMHALF2
11110(
11111    FLOAT _x,
11112    FLOAT _y
11113)
11114{
11115    x = XMConvertFloatToHalf(_x);
11116    y = XMConvertFloatToHalf(_y);
11117}
11118
11119//------------------------------------------------------------------------------
11120
11121XMFINLINE _XMHALF2::_XMHALF2
11122(
11123    CONST FLOAT* pArray
11124)
11125{
11126    x = XMConvertFloatToHalf(pArray[0]);
11127    y = XMConvertFloatToHalf(pArray[1]);
11128}
11129
11130//------------------------------------------------------------------------------
11131
11132XMFINLINE _XMHALF2& _XMHALF2::operator=
11133(
11134    CONST _XMHALF2& Half2
11135)
11136{
11137    x = Half2.x;
11138    y = Half2.y;
11139    return *this;
11140}
11141
11142/****************************************************************************
11143 *
11144 * XMSHORTN2 operators
11145 *
11146 ****************************************************************************/
11147
11148//------------------------------------------------------------------------------
11149
11150XMFINLINE _XMSHORTN2::_XMSHORTN2
11151(
11152    CONST SHORT* pArray
11153)
11154{
11155    x = pArray[0];
11156    y = pArray[1];
11157}
11158
11159//------------------------------------------------------------------------------
11160
11161XMFINLINE _XMSHORTN2::_XMSHORTN2
11162(
11163    FLOAT _x,
11164    FLOAT _y
11165)
11166{
11167    XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11168}
11169
11170//------------------------------------------------------------------------------
11171
11172XMFINLINE _XMSHORTN2::_XMSHORTN2
11173(
11174    CONST FLOAT* pArray
11175)
11176{
11177    XMStoreShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11178}
11179
11180//------------------------------------------------------------------------------
11181
11182XMFINLINE _XMSHORTN2& _XMSHORTN2::operator=
11183(
11184    CONST _XMSHORTN2& ShortN2
11185)
11186{
11187    x = ShortN2.x;
11188    y = ShortN2.y;
11189    return *this;
11190}
11191
11192/****************************************************************************
11193 *
11194 * XMSHORT2 operators
11195 *
11196 ****************************************************************************/
11197
11198//------------------------------------------------------------------------------
11199
11200XMFINLINE _XMSHORT2::_XMSHORT2
11201(
11202    CONST SHORT* pArray
11203)
11204{
11205    x = pArray[0];
11206    y = pArray[1];
11207}
11208
11209//------------------------------------------------------------------------------
11210
11211XMFINLINE _XMSHORT2::_XMSHORT2
11212(
11213    FLOAT _x,
11214    FLOAT _y
11215)
11216{
11217    XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11218}
11219
11220//------------------------------------------------------------------------------
11221
11222XMFINLINE _XMSHORT2::_XMSHORT2
11223(
11224    CONST FLOAT* pArray
11225)
11226{
11227    XMStoreShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11228}
11229
11230//------------------------------------------------------------------------------
11231
11232XMFINLINE _XMSHORT2& _XMSHORT2::operator=
11233(
11234    CONST _XMSHORT2& Short2
11235)
11236{
11237    x = Short2.x;
11238    y = Short2.y;
11239    return *this;
11240}
11241
11242/****************************************************************************
11243 *
11244 * XMUSHORTN2 operators
11245 *
11246 ****************************************************************************/
11247
11248//------------------------------------------------------------------------------
11249
11250XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11251(
11252    CONST USHORT* pArray
11253)
11254{
11255    x = pArray[0];
11256    y = pArray[1];
11257}
11258
11259//------------------------------------------------------------------------------
11260
11261XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11262(
11263    FLOAT _x,
11264    FLOAT _y
11265)
11266{
11267    XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11268}
11269
11270//------------------------------------------------------------------------------
11271
11272XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11273(
11274    CONST FLOAT* pArray
11275)
11276{
11277    XMStoreUShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11278}
11279
11280//------------------------------------------------------------------------------
11281
11282XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator=
11283(
11284    CONST _XMUSHORTN2& UShortN2
11285)
11286{
11287    x = UShortN2.x;
11288    y = UShortN2.y;
11289    return *this;
11290}
11291
11292/****************************************************************************
11293 *
11294 * XMUSHORT2 operators
11295 *
11296 ****************************************************************************/
11297
11298//------------------------------------------------------------------------------
11299
11300XMFINLINE _XMUSHORT2::_XMUSHORT2
11301(
11302    CONST USHORT* pArray
11303)
11304{
11305    x = pArray[0];
11306    y = pArray[1];
11307}
11308
11309//------------------------------------------------------------------------------
11310
11311XMFINLINE _XMUSHORT2::_XMUSHORT2
11312(
11313    FLOAT _x,
11314    FLOAT _y
11315)
11316{
11317    XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11318}
11319
11320//------------------------------------------------------------------------------
11321
11322XMFINLINE _XMUSHORT2::_XMUSHORT2
11323(
11324    CONST FLOAT* pArray
11325)
11326{
11327    XMStoreUShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11328}
11329
11330//------------------------------------------------------------------------------
11331
11332XMFINLINE _XMUSHORT2& _XMUSHORT2::operator=
11333(
11334    CONST _XMUSHORT2& UShort2
11335)
11336{
11337    x = UShort2.x;
11338    y = UShort2.y;
11339    return *this;
11340}
11341
11342/****************************************************************************
11343 *
11344 * XMFLOAT3 operators
11345 *
11346 ****************************************************************************/
11347
11348//------------------------------------------------------------------------------
11349
11350XMFINLINE _XMFLOAT3::_XMFLOAT3
11351(
11352    CONST FLOAT* pArray
11353)
11354{
11355    x = pArray[0];
11356    y = pArray[1];
11357    z = pArray[2];
11358}
11359
11360//------------------------------------------------------------------------------
11361
11362XMFINLINE _XMFLOAT3& _XMFLOAT3::operator=
11363(
11364    CONST _XMFLOAT3& Float3
11365)
11366{
11367    x = Float3.x;
11368    y = Float3.y;
11369    z = Float3.z;
11370    return *this;
11371}
11372
11373//------------------------------------------------------------------------------
11374
11375XMFINLINE XMFLOAT3A& XMFLOAT3A::operator=
11376(
11377    CONST XMFLOAT3A& Float3
11378)
11379{
11380    x = Float3.x;
11381    y = Float3.y;
11382    z = Float3.z;
11383    return *this;
11384}
11385
11386/****************************************************************************
11387 *
11388 * XMHENDN3 operators
11389 *
11390 ****************************************************************************/
11391
11392//------------------------------------------------------------------------------
11393
11394XMFINLINE _XMHENDN3::_XMHENDN3
11395(
11396    FLOAT _x,
11397    FLOAT _y,
11398    FLOAT _z
11399)
11400{
11401    XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11402}
11403
11404//------------------------------------------------------------------------------
11405
11406XMFINLINE _XMHENDN3::_XMHENDN3
11407(
11408    CONST FLOAT* pArray
11409)
11410{
11411    XMStoreHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11412}
11413
11414//------------------------------------------------------------------------------
11415
11416XMFINLINE _XMHENDN3& _XMHENDN3::operator=
11417(
11418    CONST _XMHENDN3& HenDN3
11419)
11420{
11421    v = HenDN3.v;
11422    return *this;
11423}
11424
11425//------------------------------------------------------------------------------
11426
11427XMFINLINE _XMHENDN3& _XMHENDN3::operator=
11428(
11429    CONST UINT Packed
11430)
11431{
11432    v = Packed;
11433    return *this;
11434}
11435
11436/****************************************************************************
11437 *
11438 * XMHEND3 operators
11439 *
11440 ****************************************************************************/
11441
11442//------------------------------------------------------------------------------
11443
11444XMFINLINE _XMHEND3::_XMHEND3
11445(
11446    FLOAT _x,
11447    FLOAT _y,
11448    FLOAT _z
11449)
11450{
11451    XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
11452}
11453
11454//------------------------------------------------------------------------------
11455
11456XMFINLINE _XMHEND3::_XMHEND3
11457(
11458    CONST FLOAT* pArray
11459)
11460{
11461    XMStoreHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11462}
11463
11464//------------------------------------------------------------------------------
11465
11466XMFINLINE _XMHEND3& _XMHEND3::operator=
11467(
11468    CONST _XMHEND3& HenD3
11469)
11470{
11471    v = HenD3.v;
11472    return *this;
11473}
11474
11475//------------------------------------------------------------------------------
11476
11477XMFINLINE _XMHEND3& _XMHEND3::operator=
11478(
11479    CONST UINT Packed
11480)
11481{
11482    v = Packed;
11483    return *this;
11484}
11485
11486/****************************************************************************
11487 *
11488 * XMUHENDN3 operators
11489 *
11490 ****************************************************************************/
11491
11492//------------------------------------------------------------------------------
11493
11494XMFINLINE _XMUHENDN3::_XMUHENDN3
11495(
11496    FLOAT _x,
11497    FLOAT _y,
11498    FLOAT _z
11499)
11500{
11501    XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11502}
11503
11504//------------------------------------------------------------------------------
11505
11506XMFINLINE _XMUHENDN3::_XMUHENDN3
11507(
11508    CONST FLOAT* pArray
11509)
11510{
11511    XMStoreUHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11512}
11513
11514//------------------------------------------------------------------------------
11515
11516XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
11517(
11518    CONST _XMUHENDN3& UHenDN3
11519)
11520{
11521    v = UHenDN3.v;
11522    return *this;
11523}
11524
11525//------------------------------------------------------------------------------
11526
11527XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
11528(
11529    CONST UINT Packed
11530)
11531{
11532    v = Packed;
11533    return *this;
11534}
11535
11536/****************************************************************************
11537 *
11538 * XMUHEND3 operators
11539 *
11540 ****************************************************************************/
11541
11542//------------------------------------------------------------------------------
11543
11544XMFINLINE _XMUHEND3::_XMUHEND3
11545(
11546    FLOAT _x,
11547    FLOAT _y,
11548    FLOAT _z
11549)
11550{
11551    XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
11552}
11553
11554//------------------------------------------------------------------------------
11555
11556XMFINLINE _XMUHEND3::_XMUHEND3
11557(
11558    CONST FLOAT* pArray
11559)
11560{
11561    XMStoreUHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11562}
11563
11564//------------------------------------------------------------------------------
11565
11566XMFINLINE _XMUHEND3& _XMUHEND3::operator=
11567(
11568    CONST _XMUHEND3& UHenD3
11569)
11570{
11571    v = UHenD3.v;
11572    return *this;
11573}
11574
11575//------------------------------------------------------------------------------
11576
11577XMFINLINE _XMUHEND3& _XMUHEND3::operator=
11578(
11579    CONST UINT Packed
11580)
11581{
11582    v = Packed;
11583    return *this;
11584}
11585
11586/****************************************************************************
11587 *
11588 * XMDHENN3 operators
11589 *
11590 ****************************************************************************/
11591
11592//------------------------------------------------------------------------------
11593
11594XMFINLINE _XMDHENN3::_XMDHENN3
11595(
11596    FLOAT _x,
11597    FLOAT _y,
11598    FLOAT _z
11599)
11600{
11601    XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11602}
11603
11604//------------------------------------------------------------------------------
11605
11606XMFINLINE _XMDHENN3::_XMDHENN3
11607(
11608    CONST FLOAT* pArray
11609)
11610{
11611    XMStoreDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11612}
11613
11614//------------------------------------------------------------------------------
11615
11616XMFINLINE _XMDHENN3& _XMDHENN3::operator=
11617(
11618    CONST _XMDHENN3& DHenN3
11619)
11620{
11621    v = DHenN3.v;
11622    return *this;
11623}
11624
11625//------------------------------------------------------------------------------
11626
11627XMFINLINE _XMDHENN3& _XMDHENN3::operator=
11628(
11629    CONST UINT Packed
11630)
11631{
11632    v = Packed;
11633    return *this;
11634}
11635
11636/****************************************************************************
11637 *
11638 * XMDHEN3 operators
11639 *
11640 ****************************************************************************/
11641
11642//------------------------------------------------------------------------------
11643
11644XMFINLINE _XMDHEN3::_XMDHEN3
11645(
11646    FLOAT _x,
11647    FLOAT _y,
11648    FLOAT _z
11649)
11650{
11651    XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
11652}
11653
11654//------------------------------------------------------------------------------
11655
11656XMFINLINE _XMDHEN3::_XMDHEN3
11657(
11658    CONST FLOAT* pArray
11659)
11660{
11661    XMStoreDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11662}
11663
11664//------------------------------------------------------------------------------
11665
11666XMFINLINE _XMDHEN3& _XMDHEN3::operator=
11667(
11668    CONST _XMDHEN3& DHen3
11669)
11670{
11671    v = DHen3.v;
11672    return *this;
11673}
11674
11675//------------------------------------------------------------------------------
11676
11677XMFINLINE _XMDHEN3& _XMDHEN3::operator=
11678(
11679    CONST UINT Packed
11680)
11681{
11682    v = Packed;
11683    return *this;
11684}
11685
11686/****************************************************************************
11687 *
11688 * XMUDHENN3 operators
11689 *
11690 ****************************************************************************/
11691
11692//------------------------------------------------------------------------------
11693
11694XMFINLINE _XMUDHENN3::_XMUDHENN3
11695(
11696    FLOAT _x,
11697    FLOAT _y,
11698    FLOAT _z
11699)
11700{
11701    XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11702}
11703
11704//------------------------------------------------------------------------------
11705
11706XMFINLINE _XMUDHENN3::_XMUDHENN3
11707(
11708    CONST FLOAT* pArray
11709)
11710{
11711    XMStoreUDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11712}
11713
11714//------------------------------------------------------------------------------
11715
11716XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
11717(
11718    CONST _XMUDHENN3& UDHenN3
11719)
11720{
11721    v = UDHenN3.v;
11722    return *this;
11723}
11724
11725//------------------------------------------------------------------------------
11726
11727XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
11728(
11729    CONST UINT Packed
11730)
11731{
11732    v = Packed;
11733    return *this;
11734}
11735
11736/****************************************************************************
11737 *
11738 * XMUDHEN3 operators
11739 *
11740 ****************************************************************************/
11741
11742//------------------------------------------------------------------------------
11743
11744XMFINLINE _XMUDHEN3::_XMUDHEN3
11745(
11746    FLOAT _x,
11747    FLOAT _y,
11748    FLOAT _z
11749)
11750{
11751    XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
11752}
11753
11754//------------------------------------------------------------------------------
11755
11756XMFINLINE _XMUDHEN3::_XMUDHEN3
11757(
11758    CONST FLOAT* pArray
11759)
11760{
11761    XMStoreUDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11762}
11763
11764//------------------------------------------------------------------------------
11765
11766XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
11767(
11768    CONST _XMUDHEN3& UDHen3
11769)
11770{
11771    v = UDHen3.v;
11772    return *this;
11773}
11774
11775//------------------------------------------------------------------------------
11776
11777XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
11778(
11779    CONST UINT Packed
11780)
11781{
11782    v = Packed;
11783    return *this;
11784}
11785
11786/****************************************************************************
11787 *
11788 * XMU565 operators
11789 *
11790 ****************************************************************************/
11791
11792XMFINLINE _XMU565::_XMU565
11793(
11794    CONST CHAR *pArray
11795)
11796{
11797    x = pArray[0];
11798    y = pArray[1];
11799    z = pArray[2];
11800}
11801
11802XMFINLINE _XMU565::_XMU565
11803(
11804    FLOAT _x,
11805    FLOAT _y,
11806    FLOAT _z
11807)
11808{
11809    XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f ));
11810}
11811
11812XMFINLINE _XMU565::_XMU565
11813(
11814    CONST FLOAT *pArray
11815)
11816{
11817    XMStoreU565(this, XMLoadFloat3((XMFLOAT3*)pArray ));
11818}
11819
11820XMFINLINE _XMU565& _XMU565::operator=
11821(
11822    CONST _XMU565& U565
11823)
11824{
11825    v = U565.v;
11826    return *this;
11827}
11828
11829XMFINLINE _XMU565& _XMU565::operator=
11830(
11831    CONST USHORT Packed
11832)
11833{
11834    v = Packed;
11835    return *this;
11836}
11837
11838/****************************************************************************
11839 *
11840 * XMFLOAT3PK operators
11841 *
11842 ****************************************************************************/
11843
11844XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
11845(
11846    FLOAT _x,
11847    FLOAT _y,
11848    FLOAT _z
11849)
11850{
11851    XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f ));
11852}
11853
11854XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
11855(
11856    CONST FLOAT *pArray
11857)
11858{
11859    XMStoreFloat3PK(this, XMLoadFloat3((XMFLOAT3*)pArray ));
11860}
11861
11862XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
11863(
11864    CONST _XMFLOAT3PK& float3pk
11865)
11866{
11867    v = float3pk.v;
11868    return *this;
11869}
11870
11871XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
11872(
11873    CONST UINT Packed
11874)
11875{
11876    v = Packed;
11877    return *this;
11878}
11879
11880/****************************************************************************
11881 *
11882 * XMFLOAT3SE operators
11883 *
11884 ****************************************************************************/
11885
11886XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
11887(
11888    FLOAT _x,
11889    FLOAT _y,
11890    FLOAT _z
11891)
11892{
11893    XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f ));
11894}
11895
11896XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
11897(
11898    CONST FLOAT *pArray
11899)
11900{
11901    XMStoreFloat3SE(this, XMLoadFloat3((XMFLOAT3*)pArray ));
11902}
11903
11904XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
11905(
11906    CONST _XMFLOAT3SE& float3se
11907)
11908{
11909    v = float3se.v;
11910    return *this;
11911}
11912
11913XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
11914(
11915    CONST UINT Packed
11916)
11917{
11918    v = Packed;
11919    return *this;
11920}
11921
11922/****************************************************************************
11923 *
11924 * XMFLOAT4 operators
11925 *
11926 ****************************************************************************/
11927
11928//------------------------------------------------------------------------------
11929
11930XMFINLINE _XMFLOAT4::_XMFLOAT4
11931(
11932    CONST FLOAT* pArray
11933)
11934{
11935    x = pArray[0];
11936    y = pArray[1];
11937    z = pArray[2];
11938    w = pArray[3];
11939}
11940
11941//------------------------------------------------------------------------------
11942
11943XMFINLINE _XMFLOAT4& _XMFLOAT4::operator=
11944(
11945    CONST _XMFLOAT4& Float4
11946)
11947{
11948    x = Float4.x;
11949    y = Float4.y;
11950    z = Float4.z;
11951    w = Float4.w;
11952    return *this;
11953}
11954
11955//------------------------------------------------------------------------------
11956
11957XMFINLINE XMFLOAT4A& XMFLOAT4A::operator=
11958(
11959    CONST XMFLOAT4A& Float4
11960)
11961{
11962    x = Float4.x;
11963    y = Float4.y;
11964    z = Float4.z;
11965    w = Float4.w;
11966    return *this;
11967}
11968
11969/****************************************************************************
11970 *
11971 * XMHALF4 operators
11972 *
11973 ****************************************************************************/
11974
11975//------------------------------------------------------------------------------
11976
11977XMFINLINE _XMHALF4::_XMHALF4
11978(
11979    CONST HALF* pArray
11980)
11981{
11982    x = pArray[0];
11983    y = pArray[1];
11984    z = pArray[2];
11985    w = pArray[3];
11986}
11987
11988//------------------------------------------------------------------------------
11989
11990XMFINLINE _XMHALF4::_XMHALF4
11991(
11992    FLOAT _x,
11993    FLOAT _y,
11994    FLOAT _z,
11995    FLOAT _w
11996)
11997{
11998    x = XMConvertFloatToHalf(_x);
11999    y = XMConvertFloatToHalf(_y);
12000    z = XMConvertFloatToHalf(_z);
12001    w = XMConvertFloatToHalf(_w);
12002}
12003
12004//------------------------------------------------------------------------------
12005
12006XMFINLINE _XMHALF4::_XMHALF4
12007(
12008    CONST FLOAT* pArray
12009)
12010{
12011    XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4);
12012}
12013
12014//------------------------------------------------------------------------------
12015
12016XMFINLINE _XMHALF4& _XMHALF4::operator=
12017(
12018    CONST _XMHALF4& Half4
12019)
12020{
12021    x = Half4.x;
12022    y = Half4.y;
12023    z = Half4.z;
12024    w = Half4.w;
12025    return *this;
12026}
12027
12028/****************************************************************************
12029 *
12030 * XMSHORTN4 operators
12031 *
12032 ****************************************************************************/
12033
12034//------------------------------------------------------------------------------
12035
12036XMFINLINE _XMSHORTN4::_XMSHORTN4
12037(
12038    CONST SHORT* pArray
12039)
12040{
12041    x = pArray[0];
12042    y = pArray[1];
12043    z = pArray[2];
12044    w = pArray[3];
12045}
12046
12047//------------------------------------------------------------------------------
12048
12049XMFINLINE _XMSHORTN4::_XMSHORTN4
12050(
12051    FLOAT _x,
12052    FLOAT _y,
12053    FLOAT _z,
12054    FLOAT _w
12055)
12056{
12057    XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
12058}
12059
12060//------------------------------------------------------------------------------
12061
12062XMFINLINE _XMSHORTN4::_XMSHORTN4
12063(
12064    CONST FLOAT* pArray
12065)
12066{
12067    XMStoreShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12068}
12069
12070//------------------------------------------------------------------------------
12071
12072XMFINLINE _XMSHORTN4& _XMSHORTN4::operator=
12073(
12074    CONST _XMSHORTN4& ShortN4
12075)
12076{
12077    x = ShortN4.x;
12078    y = ShortN4.y;
12079    z = ShortN4.z;
12080    w = ShortN4.w;
12081    return *this;
12082}
12083
12084/****************************************************************************
12085 *
12086 * XMSHORT4 operators
12087 *
12088 ****************************************************************************/
12089
12090//------------------------------------------------------------------------------
12091
12092XMFINLINE _XMSHORT4::_XMSHORT4
12093(
12094    CONST SHORT* pArray
12095)
12096{
12097    x = pArray[0];
12098    y = pArray[1];
12099    z = pArray[2];
12100    w = pArray[3];
12101}
12102
12103//------------------------------------------------------------------------------
12104
12105XMFINLINE _XMSHORT4::_XMSHORT4
12106(
12107    FLOAT _x,
12108    FLOAT _y,
12109    FLOAT _z,
12110    FLOAT _w
12111)
12112{
12113    XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
12114}
12115
12116//------------------------------------------------------------------------------
12117
12118XMFINLINE _XMSHORT4::_XMSHORT4
12119(
12120    CONST FLOAT* pArray
12121)
12122{
12123    XMStoreShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12124}
12125
12126//------------------------------------------------------------------------------
12127
12128XMFINLINE _XMSHORT4& _XMSHORT4::operator=
12129(
12130    CONST _XMSHORT4& Short4
12131)
12132{
12133    x = Short4.x;
12134    y = Short4.y;
12135    z = Short4.z;
12136    w = Short4.w;
12137    return *this;
12138}
12139
12140/****************************************************************************
12141 *
12142 * XMUSHORTN4 operators
12143 *
12144 ****************************************************************************/
12145
12146//------------------------------------------------------------------------------
12147
12148XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12149(
12150    CONST USHORT* pArray
12151)
12152{
12153    x = pArray[0];
12154    y = pArray[1];
12155    z = pArray[2];
12156    w = pArray[3];
12157}
12158
12159//------------------------------------------------------------------------------
12160
12161XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12162(
12163    FLOAT _x,
12164    FLOAT _y,
12165    FLOAT _z,
12166    FLOAT _w
12167)
12168{
12169    XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
12170}
12171
12172//------------------------------------------------------------------------------
12173
12174XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12175(
12176    CONST FLOAT* pArray
12177)
12178{
12179    XMStoreUShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12180}
12181
12182//------------------------------------------------------------------------------
12183
12184XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator=
12185(
12186    CONST _XMUSHORTN4& UShortN4
12187)
12188{
12189    x = UShortN4.x;
12190    y = UShortN4.y;
12191    z = UShortN4.z;
12192    w = UShortN4.w;
12193    return *this;
12194}
12195
12196/****************************************************************************
12197 *
12198 * XMUSHORT4 operators
12199 *
12200 ****************************************************************************/
12201
12202//------------------------------------------------------------------------------
12203
12204XMFINLINE _XMUSHORT4::_XMUSHORT4
12205(
12206    CONST USHORT* pArray
12207)
12208{
12209    x = pArray[0];
12210    y = pArray[1];
12211    z = pArray[2];
12212    w = pArray[3];
12213}
12214
12215//------------------------------------------------------------------------------
12216
12217XMFINLINE _XMUSHORT4::_XMUSHORT4
12218(
12219    FLOAT _x,
12220    FLOAT _y,
12221    FLOAT _z,
12222    FLOAT _w
12223)
12224{
12225    XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
12226}
12227
12228//------------------------------------------------------------------------------
12229
12230XMFINLINE _XMUSHORT4::_XMUSHORT4
12231(
12232    CONST FLOAT* pArray
12233)
12234{
12235    XMStoreUShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12236}
12237
12238//------------------------------------------------------------------------------
12239
12240XMFINLINE _XMUSHORT4& _XMUSHORT4::operator=
12241(
12242    CONST _XMUSHORT4& UShort4
12243)
12244{
12245    x = UShort4.x;
12246    y = UShort4.y;
12247    z = UShort4.z;
12248    w = UShort4.w;
12249    return *this;
12250}
12251
12252/****************************************************************************
12253 *
12254 * XMXDECN4 operators
12255 *
12256 ****************************************************************************/
12257
12258//------------------------------------------------------------------------------
12259
12260XMFINLINE _XMXDECN4::_XMXDECN4
12261(
12262    FLOAT _x,
12263    FLOAT _y,
12264    FLOAT _z,
12265    FLOAT _w
12266)
12267{
12268    XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
12269}
12270
12271//------------------------------------------------------------------------------
12272
12273XMFINLINE _XMXDECN4::_XMXDECN4
12274(
12275    CONST FLOAT* pArray
12276)
12277{
12278    XMStoreXDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12279}
12280
12281//------------------------------------------------------------------------------
12282
12283XMFINLINE _XMXDECN4& _XMXDECN4::operator=
12284(
12285    CONST _XMXDECN4& XDecN4
12286)
12287{
12288    v = XDecN4.v;
12289    return *this;
12290}
12291
12292//------------------------------------------------------------------------------
12293
12294XMFINLINE _XMXDECN4& _XMXDECN4::operator=
12295(
12296    CONST UINT Packed
12297)
12298{
12299    v = Packed;
12300    return *this;
12301}
12302
12303/****************************************************************************
12304 *
12305 * XMXDEC4 operators
12306 *
12307 ****************************************************************************/
12308
12309//------------------------------------------------------------------------------
12310
12311XMFINLINE _XMXDEC4::_XMXDEC4
12312(
12313    FLOAT _x,
12314    FLOAT _y,
12315    FLOAT _z,
12316    FLOAT _w
12317)
12318{
12319    XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
12320}
12321
12322//------------------------------------------------------------------------------
12323
12324XMFINLINE _XMXDEC4::_XMXDEC4
12325(
12326    CONST FLOAT* pArray
12327)
12328{
12329    XMStoreXDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12330}
12331
12332//------------------------------------------------------------------------------
12333
12334XMFINLINE _XMXDEC4& _XMXDEC4::operator=
12335(
12336    CONST _XMXDEC4& XDec4
12337)
12338{
12339    v = XDec4.v;
12340    return *this;
12341}
12342
12343//------------------------------------------------------------------------------
12344
12345XMFINLINE _XMXDEC4& _XMXDEC4::operator=
12346(
12347    CONST UINT Packed
12348)
12349{
12350    v = Packed;
12351    return *this;
12352}
12353
12354/****************************************************************************
12355 *
12356 * XMDECN4 operators
12357 *
12358 ****************************************************************************/
12359
12360//------------------------------------------------------------------------------
12361
12362XMFINLINE _XMDECN4::_XMDECN4
12363(
12364    FLOAT _x,
12365    FLOAT _y,
12366    FLOAT _z,
12367    FLOAT _w
12368)
12369{
12370    XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
12371}
12372
12373//------------------------------------------------------------------------------
12374
12375XMFINLINE _XMDECN4::_XMDECN4
12376(
12377    CONST FLOAT* pArray
12378)
12379{
12380    XMStoreDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12381}
12382
12383//------------------------------------------------------------------------------
12384
12385XMFINLINE _XMDECN4& _XMDECN4::operator=
12386(
12387    CONST _XMDECN4& DecN4
12388)
12389{
12390    v = DecN4.v;
12391    return *this;
12392}
12393
12394//------------------------------------------------------------------------------
12395
12396XMFINLINE _XMDECN4& _XMDECN4::operator=
12397(
12398    CONST UINT Packed
12399)
12400{
12401    v = Packed;
12402    return *this;
12403}
12404
12405/****************************************************************************
12406 *
12407 * XMDEC4 operators
12408 *
12409 ****************************************************************************/
12410
12411//------------------------------------------------------------------------------
12412
12413XMFINLINE _XMDEC4::_XMDEC4
12414(
12415    FLOAT _x,
12416    FLOAT _y,
12417    FLOAT _z,
12418    FLOAT _w
12419)
12420{
12421    XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
12422}
12423
12424//------------------------------------------------------------------------------
12425
12426XMFINLINE _XMDEC4::_XMDEC4
12427(
12428    CONST FLOAT* pArray
12429)
12430{
12431    XMStoreDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12432}
12433
12434//------------------------------------------------------------------------------
12435
12436XMFINLINE _XMDEC4& _XMDEC4::operator=
12437(
12438    CONST _XMDEC4& Dec4
12439)
12440{
12441    v = Dec4.v;
12442    return *this;
12443}
12444
12445//------------------------------------------------------------------------------
12446
12447XMFINLINE _XMDEC4& _XMDEC4::operator=
12448(
12449    CONST UINT Packed
12450)
12451{
12452    v = Packed;
12453    return *this;
12454}
12455
12456/****************************************************************************
12457 *
12458 * XMUDECN4 operators
12459 *
12460 ****************************************************************************/
12461
12462//------------------------------------------------------------------------------
12463
12464XMFINLINE _XMUDECN4::_XMUDECN4
12465(
12466    FLOAT _x,
12467    FLOAT _y,
12468    FLOAT _z,
12469    FLOAT _w
12470)
12471{
12472    XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
12473}
12474
12475//------------------------------------------------------------------------------
12476
12477XMFINLINE _XMUDECN4::_XMUDECN4
12478(
12479    CONST FLOAT* pArray
12480)
12481{
12482    XMStoreUDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12483}
12484
12485//------------------------------------------------------------------------------
12486
12487XMFINLINE _XMUDECN4& _XMUDECN4::operator=
12488(
12489    CONST _XMUDECN4& UDecN4
12490)
12491{
12492    v = UDecN4.v;
12493    return *this;
12494}
12495
12496//------------------------------------------------------------------------------
12497
12498XMFINLINE _XMUDECN4& _XMUDECN4::operator=
12499(
12500    CONST UINT Packed
12501)
12502{
12503    v = Packed;
12504    return *this;
12505}
12506
12507/****************************************************************************
12508 *
12509 * XMUDEC4 operators
12510 *
12511 ****************************************************************************/
12512
12513//------------------------------------------------------------------------------
12514
12515XMFINLINE _XMUDEC4::_XMUDEC4
12516(
12517    FLOAT _x,
12518    FLOAT _y,
12519    FLOAT _z,
12520    FLOAT _w
12521)
12522{
12523    XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
12524}
12525
12526//------------------------------------------------------------------------------
12527
12528XMFINLINE _XMUDEC4::_XMUDEC4
12529(
12530    CONST FLOAT* pArray
12531)
12532{
12533    XMStoreUDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12534}
12535
12536//------------------------------------------------------------------------------
12537
12538XMFINLINE _XMUDEC4& _XMUDEC4::operator=
12539(
12540    CONST _XMUDEC4& UDec4
12541)
12542{
12543    v = UDec4.v;
12544    return *this;
12545}
12546
12547//------------------------------------------------------------------------------
12548
12549XMFINLINE _XMUDEC4& _XMUDEC4::operator=
12550(
12551    CONST UINT Packed
12552)
12553{
12554    v = Packed;
12555    return *this;
12556}
12557
12558/****************************************************************************
12559 *
12560 * XMXICON4 operators
12561 *
12562 ****************************************************************************/
12563
12564//------------------------------------------------------------------------------
12565
12566XMFINLINE _XMXICON4::_XMXICON4
12567(
12568    FLOAT _x,
12569    FLOAT _y,
12570    FLOAT _z,
12571    FLOAT _w
12572)
12573{
12574    XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w));
12575}
12576
12577//------------------------------------------------------------------------------
12578
12579XMFINLINE _XMXICON4::_XMXICON4
12580(
12581    CONST FLOAT* pArray
12582)
12583{
12584    XMStoreXIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12585}
12586
12587//------------------------------------------------------------------------------
12588
12589XMFINLINE _XMXICON4& _XMXICON4::operator=
12590(
12591    CONST _XMXICON4& XIcoN4
12592)
12593{
12594    v = XIcoN4.v;
12595    return *this;
12596}
12597
12598//------------------------------------------------------------------------------
12599
12600XMFINLINE _XMXICON4& _XMXICON4::operator=
12601(
12602    CONST UINT64 Packed
12603)
12604{
12605    v = Packed;
12606    return *this;
12607}
12608
12609/****************************************************************************
12610 *
12611 * XMXICO4 operators
12612 *
12613 ****************************************************************************/
12614
12615//------------------------------------------------------------------------------
12616
12617XMFINLINE _XMXICO4::_XMXICO4
12618(
12619    FLOAT _x,
12620    FLOAT _y,
12621    FLOAT _z,
12622    FLOAT _w
12623)
12624{
12625    XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w));
12626}
12627
12628//------------------------------------------------------------------------------
12629
12630XMFINLINE _XMXICO4::_XMXICO4
12631(
12632    CONST FLOAT* pArray
12633)
12634{
12635    XMStoreXIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12636}
12637
12638//------------------------------------------------------------------------------
12639
12640XMFINLINE _XMXICO4& _XMXICO4::operator=
12641(
12642    CONST _XMXICO4& XIco4
12643)
12644{
12645    v = XIco4.v;
12646    return *this;
12647}
12648
12649//------------------------------------------------------------------------------
12650
12651XMFINLINE _XMXICO4& _XMXICO4::operator=
12652(
12653    CONST UINT64 Packed
12654)
12655{
12656    v = Packed;
12657    return *this;
12658}
12659
12660/****************************************************************************
12661 *
12662 * XMICON4 operators
12663 *
12664 ****************************************************************************/
12665
12666//------------------------------------------------------------------------------
12667
12668XMFINLINE _XMICON4::_XMICON4
12669(
12670    FLOAT _x,
12671    FLOAT _y,
12672    FLOAT _z,
12673    FLOAT _w
12674)
12675{
12676    XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w));
12677}
12678
12679//------------------------------------------------------------------------------
12680
12681XMFINLINE _XMICON4::_XMICON4
12682(
12683    CONST FLOAT* pArray
12684)
12685{
12686    XMStoreIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12687}
12688
12689//------------------------------------------------------------------------------
12690
12691XMFINLINE _XMICON4& _XMICON4::operator=
12692(
12693    CONST _XMICON4& IcoN4
12694)
12695{
12696    v = IcoN4.v;
12697    return *this;
12698}
12699
12700//------------------------------------------------------------------------------
12701
12702XMFINLINE _XMICON4& _XMICON4::operator=
12703(
12704    CONST UINT64 Packed
12705)
12706{
12707    v = Packed;
12708    return *this;
12709}
12710
12711/****************************************************************************
12712 *
12713 * XMICO4 operators
12714 *
12715 ****************************************************************************/
12716
12717//------------------------------------------------------------------------------
12718
12719XMFINLINE _XMICO4::_XMICO4
12720(
12721    FLOAT _x,
12722    FLOAT _y,
12723    FLOAT _z,
12724    FLOAT _w
12725)
12726{
12727    XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w));
12728}
12729
12730//------------------------------------------------------------------------------
12731
12732XMFINLINE _XMICO4::_XMICO4
12733(
12734    CONST FLOAT* pArray
12735)
12736{
12737    XMStoreIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12738}
12739
12740//------------------------------------------------------------------------------
12741
12742XMFINLINE _XMICO4& _XMICO4::operator=
12743(
12744    CONST _XMICO4& Ico4
12745)
12746{
12747    v = Ico4.v;
12748    return *this;
12749}
12750
12751//------------------------------------------------------------------------------
12752
12753XMFINLINE _XMICO4& _XMICO4::operator=
12754(
12755    CONST UINT64 Packed
12756)
12757{
12758    v = Packed;
12759    return *this;
12760}
12761
12762/****************************************************************************
12763 *
12764 * XMUICON4 operators
12765 *
12766 ****************************************************************************/
12767
12768//------------------------------------------------------------------------------
12769
12770XMFINLINE _XMUICON4::_XMUICON4
12771(
12772    FLOAT _x,
12773    FLOAT _y,
12774    FLOAT _z,
12775    FLOAT _w
12776)
12777{
12778    XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w));
12779}
12780
12781//------------------------------------------------------------------------------
12782
12783XMFINLINE _XMUICON4::_XMUICON4
12784(
12785    CONST FLOAT* pArray
12786)
12787{
12788    XMStoreUIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12789}
12790
12791//------------------------------------------------------------------------------
12792
12793XMFINLINE _XMUICON4& _XMUICON4::operator=
12794(
12795    CONST _XMUICON4& UIcoN4
12796)
12797{
12798    v = UIcoN4.v;
12799    return *this;
12800}
12801
12802//------------------------------------------------------------------------------
12803
12804XMFINLINE _XMUICON4& _XMUICON4::operator=
12805(
12806    CONST UINT64 Packed
12807)
12808{
12809    v = Packed;
12810    return *this;
12811}
12812
12813/****************************************************************************
12814 *
12815 * XMUICO4 operators
12816 *
12817 ****************************************************************************/
12818
12819//------------------------------------------------------------------------------
12820
12821XMFINLINE _XMUICO4::_XMUICO4
12822(
12823    FLOAT _x,
12824    FLOAT _y,
12825    FLOAT _z,
12826    FLOAT _w
12827)
12828{
12829    XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w));
12830}
12831
12832//------------------------------------------------------------------------------
12833
12834XMFINLINE _XMUICO4::_XMUICO4
12835(
12836    CONST FLOAT* pArray
12837)
12838{
12839    XMStoreUIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12840}
12841
12842//------------------------------------------------------------------------------
12843
12844XMFINLINE _XMUICO4& _XMUICO4::operator=
12845(
12846    CONST _XMUICO4& UIco4
12847)
12848{
12849    v = UIco4.v;
12850    return *this;
12851}
12852
12853//------------------------------------------------------------------------------
12854
12855XMFINLINE _XMUICO4& _XMUICO4::operator=
12856(
12857    CONST UINT64 Packed
12858)
12859{
12860    v = Packed;
12861    return *this;
12862}
12863
12864/****************************************************************************
12865 *
12866 * XMCOLOR4 operators
12867 *
12868 ****************************************************************************/
12869
12870//------------------------------------------------------------------------------
12871
12872XMFINLINE _XMCOLOR::_XMCOLOR
12873(
12874    FLOAT _r,
12875    FLOAT _g,
12876    FLOAT _b,
12877    FLOAT _a
12878)
12879{
12880    XMStoreColor(this, XMVectorSet(_r, _g, _b, _a));
12881}
12882
12883//------------------------------------------------------------------------------
12884
12885XMFINLINE _XMCOLOR::_XMCOLOR
12886(
12887    CONST FLOAT* pArray
12888)
12889{
12890    XMStoreColor(this, XMLoadFloat4((XMFLOAT4*)pArray));
12891}
12892
12893//------------------------------------------------------------------------------
12894
12895XMFINLINE _XMCOLOR& _XMCOLOR::operator=
12896(
12897    CONST _XMCOLOR& Color
12898)
12899{
12900    c = Color.c;
12901    return *this;
12902}
12903
12904//------------------------------------------------------------------------------
12905
12906XMFINLINE _XMCOLOR& _XMCOLOR::operator=
12907(
12908    CONST UINT Color
12909)
12910{
12911    c = Color;
12912    return *this;
12913}
12914
12915/****************************************************************************
12916 *
12917 * XMBYTEN4 operators
12918 *
12919 ****************************************************************************/
12920
12921//------------------------------------------------------------------------------
12922
12923XMFINLINE _XMBYTEN4::_XMBYTEN4
12924(
12925    CONST CHAR* pArray
12926)
12927{
12928    x = pArray[0];
12929    y = pArray[1];
12930    z = pArray[2];
12931    w = pArray[3];
12932}
12933
12934//------------------------------------------------------------------------------
12935
12936XMFINLINE _XMBYTEN4::_XMBYTEN4
12937(
12938    FLOAT _x,
12939    FLOAT _y,
12940    FLOAT _z,
12941    FLOAT _w
12942)
12943{
12944    XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
12945}
12946
12947//------------------------------------------------------------------------------
12948
12949XMFINLINE _XMBYTEN4::_XMBYTEN4
12950(
12951    CONST FLOAT* pArray
12952)
12953{
12954    XMStoreByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12955}
12956
12957//------------------------------------------------------------------------------
12958
12959XMFINLINE _XMBYTEN4& _XMBYTEN4::operator=
12960(
12961    CONST _XMBYTEN4& ByteN4
12962)
12963{
12964    x = ByteN4.x;
12965    y = ByteN4.y;
12966    z = ByteN4.z;
12967    w = ByteN4.w;
12968    return *this;
12969}
12970
12971/****************************************************************************
12972 *
12973 * XMBYTE4 operators
12974 *
12975 ****************************************************************************/
12976
12977//------------------------------------------------------------------------------
12978
12979XMFINLINE _XMBYTE4::_XMBYTE4
12980(
12981    CONST CHAR* pArray
12982)
12983{
12984    x = pArray[0];
12985    y = pArray[1];
12986    z = pArray[2];
12987    w = pArray[3];
12988}
12989
12990//------------------------------------------------------------------------------
12991
12992XMFINLINE _XMBYTE4::_XMBYTE4
12993(
12994    FLOAT _x,
12995    FLOAT _y,
12996    FLOAT _z,
12997    FLOAT _w
12998)
12999{
13000    XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
13001}
13002
13003//------------------------------------------------------------------------------
13004
13005XMFINLINE _XMBYTE4::_XMBYTE4
13006(
13007    CONST FLOAT* pArray
13008)
13009{
13010    XMStoreByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
13011}
13012
13013//------------------------------------------------------------------------------
13014
13015XMFINLINE _XMBYTE4& _XMBYTE4::operator=
13016(
13017    CONST _XMBYTE4& Byte4
13018)
13019{
13020    x = Byte4.x;
13021    y = Byte4.y;
13022    z = Byte4.z;
13023    w = Byte4.w;
13024    return *this;
13025}
13026
13027/****************************************************************************
13028 *
13029 * XMUBYTEN4 operators
13030 *
13031 ****************************************************************************/
13032
13033//------------------------------------------------------------------------------
13034
13035XMFINLINE _XMUBYTEN4::_XMUBYTEN4
13036(
13037    CONST BYTE* pArray
13038)
13039{
13040    x = pArray[0];
13041    y = pArray[1];
13042    z = pArray[2];
13043    w = pArray[3];
13044}
13045
13046//------------------------------------------------------------------------------
13047
13048XMFINLINE _XMUBYTEN4::_XMUBYTEN4
13049(
13050    FLOAT _x,
13051    FLOAT _y,
13052    FLOAT _z,
13053    FLOAT _w
13054)
13055{
13056    XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
13057}
13058
13059//------------------------------------------------------------------------------
13060
13061XMFINLINE _XMUBYTEN4::_XMUBYTEN4
13062(
13063    CONST FLOAT* pArray
13064)
13065{
13066    XMStoreUByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
13067}
13068
13069//------------------------------------------------------------------------------
13070
13071XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator=
13072(
13073    CONST _XMUBYTEN4& UByteN4
13074)
13075{
13076    x = UByteN4.x;
13077    y = UByteN4.y;
13078    z = UByteN4.z;
13079    w = UByteN4.w;
13080    return *this;
13081}
13082
13083/****************************************************************************
13084 *
13085 * XMUBYTE4 operators
13086 *
13087 ****************************************************************************/
13088
13089//------------------------------------------------------------------------------
13090
13091XMFINLINE _XMUBYTE4::_XMUBYTE4
13092(
13093    CONST BYTE* pArray
13094)
13095{
13096    x = pArray[0];
13097    y = pArray[1];
13098    z = pArray[2];
13099    w = pArray[3];
13100}
13101
13102//------------------------------------------------------------------------------
13103
13104XMFINLINE _XMUBYTE4::_XMUBYTE4
13105(
13106    FLOAT _x,
13107    FLOAT _y,
13108    FLOAT _z,
13109    FLOAT _w
13110)
13111{
13112    XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
13113}
13114
13115//------------------------------------------------------------------------------
13116
13117XMFINLINE _XMUBYTE4::_XMUBYTE4
13118(
13119    CONST FLOAT* pArray
13120)
13121{
13122    XMStoreUByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
13123}
13124
13125//------------------------------------------------------------------------------
13126
13127XMFINLINE _XMUBYTE4& _XMUBYTE4::operator=
13128(
13129    CONST _XMUBYTE4& UByte4
13130)
13131{
13132    x = UByte4.x;
13133    y = UByte4.y;
13134    z = UByte4.z;
13135    w = UByte4.w;
13136    return *this;
13137}
13138
13139/****************************************************************************
13140 *
13141 * XMUNIBBLE4 operators
13142 *
13143 ****************************************************************************/
13144
13145//------------------------------------------------------------------------------
13146
13147XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13148(
13149    CONST CHAR *pArray
13150)
13151{
13152    x = pArray[0];
13153    y = pArray[1];
13154    z = pArray[2];
13155    w = pArray[3];
13156}
13157
13158//------------------------------------------------------------------------------
13159
13160XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13161(
13162    FLOAT _x,
13163    FLOAT _y,
13164    FLOAT _z,
13165    FLOAT _w
13166)
13167{
13168    XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w ));
13169}
13170
13171//------------------------------------------------------------------------------
13172
13173XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13174(
13175    CONST FLOAT *pArray
13176)
13177{
13178    XMStoreUNibble4(this, XMLoadFloat4((XMFLOAT4*)pArray));
13179}
13180
13181//------------------------------------------------------------------------------
13182
13183XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
13184(
13185    CONST _XMUNIBBLE4& UNibble4
13186)
13187{
13188    v = UNibble4.v;
13189    return *this;
13190}
13191
13192//------------------------------------------------------------------------------
13193
13194XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
13195(
13196    CONST USHORT Packed
13197)
13198{
13199    v = Packed;
13200    return *this;
13201}
13202
13203/****************************************************************************
13204 *
13205 * XMU555 operators
13206 *
13207 ****************************************************************************/
13208
13209//------------------------------------------------------------------------------
13210
13211XMFINLINE _XMU555::_XMU555
13212(
13213    CONST CHAR *pArray,
13214    BOOL _w
13215)
13216{
13217    x = pArray[0];
13218    y = pArray[1];
13219    z = pArray[2];
13220    w = _w;
13221}
13222
13223//------------------------------------------------------------------------------
13224
13225XMFINLINE _XMU555::_XMU555
13226(
13227    FLOAT _x,
13228    FLOAT _y,
13229    FLOAT _z,
13230    BOOL _w
13231)
13232{
13233    XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) ));
13234}
13235
13236//------------------------------------------------------------------------------
13237
13238XMFINLINE _XMU555::_XMU555
13239(
13240    CONST FLOAT *pArray,
13241    BOOL _w
13242)
13243{
13244    XMVECTOR V = XMLoadFloat3((XMFLOAT3*)pArray);
13245    XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) ));
13246}
13247
13248//------------------------------------------------------------------------------
13249
13250XMFINLINE _XMU555& _XMU555::operator=
13251(
13252    CONST _XMU555& U555
13253)
13254{
13255    v = U555.v;
13256    return *this;
13257}
13258
13259//------------------------------------------------------------------------------
13260
13261XMFINLINE _XMU555& _XMU555::operator=
13262(
13263    CONST USHORT Packed
13264)
13265{
13266    v = Packed;
13267    return *this;
13268}
13269
13270#endif // __cplusplus
13271
13272#if defined(_XM_NO_INTRINSICS_)
13273#undef XMISNAN
13274#undef XMISINF
13275#endif
13276
13277#endif // __XNAMATHVECTOR_INL__
13278