1 /*
2    Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
3    All rights reserved.
4 
5    Redistribution and use in source and binary forms,
6    with or without modification, are permitted provided that the
7    following conditions are met:
8     * Redistributions of source code must retain the above copyright
9       notice, this list of conditions and the following disclaimer.
10     * Redistributions in binary form must reproduce the above copyright
11       notice, this list of conditions and the following disclaimer in the
12       documentation and/or other materials provided with the distribution.
13     * Neither the name of the Sony Computer Entertainment Inc nor the names
14       of its contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27    POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #ifndef _VECTORMATH_VEC_AOS_CPP_H
31 #define _VECTORMATH_VEC_AOS_CPP_H
32 
33 //-----------------------------------------------------------------------------
34 // Constants
35 // for permutes words are labeled [x,y,z,w] [a,b,c,d]
36 
37 #define _VECTORMATH_PERM_X 0x00010203
38 #define _VECTORMATH_PERM_Y 0x04050607
39 #define _VECTORMATH_PERM_Z 0x08090a0b
40 #define _VECTORMATH_PERM_W 0x0c0d0e0f
41 #define _VECTORMATH_PERM_A 0x10111213
42 #define _VECTORMATH_PERM_B 0x14151617
43 #define _VECTORMATH_PERM_C 0x18191a1b
44 #define _VECTORMATH_PERM_D 0x1c1d1e1f
45 #define _VECTORMATH_PERM_XYZA \
46 	(vec_uchar16)(vec_uint4) { _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
47 #define _VECTORMATH_PERM_ZXYW \
48 	(vec_uchar16)(vec_uint4) { _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
49 #define _VECTORMATH_PERM_YZXW \
50 	(vec_uchar16)(vec_uint4) { _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
51 #define _VECTORMATH_PERM_YZAB \
52 	(vec_uchar16)(vec_uint4) { _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
53 #define _VECTORMATH_PERM_ZABC \
54 	(vec_uchar16)(vec_uint4) { _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
55 #define _VECTORMATH_PERM_XYAW \
56 	(vec_uchar16)(vec_uint4) { _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
57 #define _VECTORMATH_PERM_XAZW \
58 	(vec_uchar16)(vec_uint4) { _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
59 #define _VECTORMATH_MASK_0xF000 \
60 	(vec_uint4) { 0xffffffff, 0, 0, 0 }
61 #define _VECTORMATH_MASK_0x0F00 \
62 	(vec_uint4) { 0, 0xffffffff, 0, 0 }
63 #define _VECTORMATH_MASK_0x00F0 \
64 	(vec_uint4) { 0, 0, 0xffffffff, 0 }
65 #define _VECTORMATH_MASK_0x000F \
66 	(vec_uint4) { 0, 0, 0, 0xffffffff }
67 #define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f)  // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
68 #define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f)  // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
69 #define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f)  // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
70 #define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f)  // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
71 #define _VECTORMATH_SLERP_TOL 0.999f
72 //_VECTORMATH_SLERP_TOLF
73 
74 //-----------------------------------------------------------------------------
75 // Definitions
76 
77 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
78 #define _VECTORMATH_INTERNAL_FUNCTIONS
79 
80 #define _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx))
_vmathVfDot3(__m128 vec0,__m128 vec1)81 static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot3(__m128 vec0, __m128 vec1)
82 {
83 	__m128 result = _mm_mul_ps(vec0, vec1);
84 	return _mm_add_ps(vec_splat(result, 0), _mm_add_ps(vec_splat(result, 1), vec_splat(result, 2)));
85 }
86 
_vmathVfDot4(__m128 vec0,__m128 vec1)87 static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot4(__m128 vec0, __m128 vec1)
88 {
89 	__m128 result = _mm_mul_ps(vec0, vec1);
90 	return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0, 0, 0, 0)),
91 					  _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 1, 1, 1)),
92 								 _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3, 3, 3, 3)))));
93 }
94 
_vmathVfCross(__m128 vec0,__m128 vec1)95 static VECTORMATH_FORCE_INLINE __m128 _vmathVfCross(__m128 vec0, __m128 vec1)
96 {
97 	__m128 tmp0, tmp1, tmp2, tmp3, result;
98 	tmp0 = _mm_shuffle_ps(vec0, vec0, _MM_SHUFFLE(3, 0, 2, 1));
99 	tmp1 = _mm_shuffle_ps(vec1, vec1, _MM_SHUFFLE(3, 1, 0, 2));
100 	tmp2 = _mm_shuffle_ps(vec0, vec0, _MM_SHUFFLE(3, 1, 0, 2));
101 	tmp3 = _mm_shuffle_ps(vec1, vec1, _MM_SHUFFLE(3, 0, 2, 1));
102 	result = vec_mul(tmp0, tmp1);
103 	result = vec_nmsub(tmp2, tmp3, result);
104 	return result;
105 }
106 /*
107 static VECTORMATH_FORCE_INLINE vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
108 {
109 #if 0
110     vec_int4 bexp;
111     vec_uint4 mant, sign, hfloat;
112     vec_uint4 notZero, isInf;
113     const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
114     const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
115     const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
116 
117     sign = vec_sr((vec_uint4)v, (vec_uint4)16);
118     mant = vec_sr((vec_uint4)v, (vec_uint4)13);
119     bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
120 
121     notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
122     isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
123 
124     bexp = _mm_add_ps(bexp, (vec_int4)-112);
125     bexp = vec_sl(bexp, (vec_uint4)10);
126 
127     hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
128     hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
129     hfloat = vec_sel(hfloat, hfloatInf, isInf);
130     hfloat = vec_sel(hfloat, sign, mergeSign);
131 
132     return hfloat;
133 #else
134 	assert(0);
135 	return _mm_setzero_ps();
136 #endif
137 }
138 
139 static VECTORMATH_FORCE_INLINE vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
140 {
141 #if 0
142     vec_uint4 hfloat_u, hfloat_v;
143     const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
144     hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
145     hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
146     return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
147 #else
148 	assert(0);
149 	return _mm_setzero_si128();
150 #endif
151 }
152 */
153 
_vmathVfInsert(__m128 dst,__m128 src,int slot)154 static VECTORMATH_FORCE_INLINE __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
155 {
156 	SSEFloat s;
157 	s.m128 = src;
158 	SSEFloat d;
159 	d.m128 = dst;
160 	d.f[slot] = s.f[slot];
161 	return d.m128;
162 }
163 
164 #define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
165 
_vmathVfSplatScalar(float scalar)166 static VECTORMATH_FORCE_INLINE __m128 _vmathVfSplatScalar(float scalar)
167 {
168 	return _mm_set1_ps(scalar);
169 }
170 
171 #endif
172 
173 namespace Vectormath
174 {
175 namespace Aos
176 {
177 #ifdef _VECTORMATH_NO_SCALAR_CAST
floatInVec()178 VECTORMATH_FORCE_INLINE VecIdx::operator floatInVec() const
179 {
180 	return floatInVec(ref, i);
181 }
182 
getAsFloat()183 VECTORMATH_FORCE_INLINE float VecIdx::getAsFloat() const
184 #else
185 VECTORMATH_FORCE_INLINE VecIdx::operator float() const
186 #endif
187 {
188 	return ((float *)&ref)[i];
189 }
190 
191 VECTORMATH_FORCE_INLINE float VecIdx::operator=(float scalar)
192 {
193 	_vmathVfSetElement(ref, scalar, i);
194 	return scalar;
195 }
196 
197 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator=(const floatInVec &scalar)
198 {
199 	ref = _vmathVfInsert(ref, scalar.get128(), i);
200 	return scalar;
201 }
202 
203 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator=(const VecIdx &scalar)
204 {
205 	return *this = floatInVec(scalar.ref, scalar.i);
206 }
207 
208 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator*=(float scalar)
209 {
210 	return *this *= floatInVec(scalar);
211 }
212 
213 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator*=(const floatInVec &scalar)
214 {
215 	return *this = floatInVec(ref, i) * scalar;
216 }
217 
218 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator/=(float scalar)
219 {
220 	return *this /= floatInVec(scalar);
221 }
222 
223 inline floatInVec VecIdx::operator/=(const floatInVec &scalar)
224 {
225 	return *this = floatInVec(ref, i) / scalar;
226 }
227 
228 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator+=(float scalar)
229 {
230 	return *this += floatInVec(scalar);
231 }
232 
233 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator+=(const floatInVec &scalar)
234 {
235 	return *this = floatInVec(ref, i) + scalar;
236 }
237 
238 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator-=(float scalar)
239 {
240 	return *this -= floatInVec(scalar);
241 }
242 
243 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator-=(const floatInVec &scalar)
244 {
245 	return *this = floatInVec(ref, i) - scalar;
246 }
247 
Vector3(const Vector3 & vec)248 VECTORMATH_FORCE_INLINE Vector3::Vector3(const Vector3 &vec)
249 {
250 	set128(vec.get128());
251 }
252 
set128(vec_float4 vec)253 VECTORMATH_FORCE_INLINE void Vector3::set128(vec_float4 vec)
254 {
255 	mVec128 = vec;
256 }
257 
Vector3(float _x,float _y,float _z)258 VECTORMATH_FORCE_INLINE Vector3::Vector3(float _x, float _y, float _z)
259 {
260 	mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
261 }
262 
Vector3(const floatInVec & _x,const floatInVec & _y,const floatInVec & _z)263 VECTORMATH_FORCE_INLINE Vector3::Vector3(const floatInVec &_x, const floatInVec &_y, const floatInVec &_z)
264 {
265 	__m128 xz = _mm_unpacklo_ps(_x.get128(), _z.get128());
266 	mVec128 = _mm_unpacklo_ps(xz, _y.get128());
267 }
268 
Vector3(const Point3 & pnt)269 VECTORMATH_FORCE_INLINE Vector3::Vector3(const Point3 &pnt)
270 {
271 	mVec128 = pnt.get128();
272 }
273 
Vector3(float scalar)274 VECTORMATH_FORCE_INLINE Vector3::Vector3(float scalar)
275 {
276 	mVec128 = floatInVec(scalar).get128();
277 }
278 
Vector3(const floatInVec & scalar)279 VECTORMATH_FORCE_INLINE Vector3::Vector3(const floatInVec &scalar)
280 {
281 	mVec128 = scalar.get128();
282 }
283 
Vector3(__m128 vf4)284 VECTORMATH_FORCE_INLINE Vector3::Vector3(__m128 vf4)
285 {
286 	mVec128 = vf4;
287 }
288 
xAxis()289 VECTORMATH_FORCE_INLINE const Vector3 Vector3::xAxis()
290 {
291 	return Vector3(_VECTORMATH_UNIT_1000);
292 }
293 
yAxis()294 VECTORMATH_FORCE_INLINE const Vector3 Vector3::yAxis()
295 {
296 	return Vector3(_VECTORMATH_UNIT_0100);
297 }
298 
zAxis()299 VECTORMATH_FORCE_INLINE const Vector3 Vector3::zAxis()
300 {
301 	return Vector3(_VECTORMATH_UNIT_0010);
302 }
303 
lerp(float t,const Vector3 & vec0,const Vector3 & vec1)304 VECTORMATH_FORCE_INLINE const Vector3 lerp(float t, const Vector3 &vec0, const Vector3 &vec1)
305 {
306 	return lerp(floatInVec(t), vec0, vec1);
307 }
308 
lerp(const floatInVec & t,const Vector3 & vec0,const Vector3 & vec1)309 VECTORMATH_FORCE_INLINE const Vector3 lerp(const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1)
310 {
311 	return (vec0 + ((vec1 - vec0) * t));
312 }
313 
slerp(float t,const Vector3 & unitVec0,const Vector3 & unitVec1)314 VECTORMATH_FORCE_INLINE const Vector3 slerp(float t, const Vector3 &unitVec0, const Vector3 &unitVec1)
315 {
316 	return slerp(floatInVec(t), unitVec0, unitVec1);
317 }
318 
slerp(const floatInVec & t,const Vector3 & unitVec0,const Vector3 & unitVec1)319 VECTORMATH_FORCE_INLINE const Vector3 slerp(const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1)
320 {
321 	__m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
322 	cosAngle = _vmathVfDot3(unitVec0.get128(), unitVec1.get128());
323 	__m128 selectMask = _mm_cmpgt_ps(_mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle);
324 	angle = acosf4(cosAngle);
325 	tttt = t.get128();
326 	oneMinusT = _mm_sub_ps(_mm_set1_ps(1.0f), tttt);
327 	angles = _mm_unpacklo_ps(_mm_set1_ps(1.0f), tttt);  // angles = 1, t, 1, t
328 	angles = _mm_unpacklo_ps(angles, oneMinusT);        // angles = 1, 1-t, t, 1-t
329 	angles = _mm_mul_ps(angles, angle);
330 	sines = sinf4(angles);
331 	scales = _mm_div_ps(sines, vec_splat(sines, 0));
332 	scale0 = vec_sel(oneMinusT, vec_splat(scales, 1), selectMask);
333 	scale1 = vec_sel(tttt, vec_splat(scales, 2), selectMask);
334 	return Vector3(vec_madd(unitVec0.get128(), scale0, _mm_mul_ps(unitVec1.get128(), scale1)));
335 }
336 
get128()337 VECTORMATH_FORCE_INLINE __m128 Vector3::get128() const
338 {
339 	return mVec128;
340 }
341 
loadXYZ(Point3 & vec,const float * fptr)342 VECTORMATH_FORCE_INLINE void loadXYZ(Point3 &vec, const float *fptr)
343 {
344 #ifdef USE_SSE3_LDDQU
345 	vec = Point3(SSEFloat(_mm_lddqu_si128((const __m128i *)((float *)(fptr)))).m128);
346 #else
347 	SSEFloat fl;
348 	fl.f[0] = fptr[0];
349 	fl.f[1] = fptr[1];
350 	fl.f[2] = fptr[2];
351 	fl.f[3] = fptr[3];
352 	vec = Point3(fl.m128);
353 #endif  //USE_SSE3_LDDQU
354 }
355 
loadXYZ(Vector3 & vec,const float * fptr)356 VECTORMATH_FORCE_INLINE void loadXYZ(Vector3 &vec, const float *fptr)
357 {
358 #ifdef USE_SSE3_LDDQU
359 	vec = Vector3(SSEFloat(_mm_lddqu_si128((const __m128i *)((float *)(fptr)))).m128);
360 #else
361 	SSEFloat fl;
362 	fl.f[0] = fptr[0];
363 	fl.f[1] = fptr[1];
364 	fl.f[2] = fptr[2];
365 	fl.f[3] = fptr[3];
366 	vec = Vector3(fl.m128);
367 #endif  //USE_SSE3_LDDQU
368 }
369 
storeXYZ(const Vector3 & vec,__m128 * quad)370 VECTORMATH_FORCE_INLINE void storeXYZ(const Vector3 &vec, __m128 *quad)
371 {
372 	__m128 dstVec = *quad;
373 	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};  // TODO: Centralize
374 	dstVec = vec_sel(vec.get128(), dstVec, sw);
375 	*quad = dstVec;
376 }
377 
storeXYZ(const Point3 & vec,float * fptr)378 VECTORMATH_FORCE_INLINE void storeXYZ(const Point3 &vec, float *fptr)
379 {
380 	fptr[0] = vec.getX();
381 	fptr[1] = vec.getY();
382 	fptr[2] = vec.getZ();
383 }
384 
storeXYZ(const Vector3 & vec,float * fptr)385 VECTORMATH_FORCE_INLINE void storeXYZ(const Vector3 &vec, float *fptr)
386 {
387 	fptr[0] = vec.getX();
388 	fptr[1] = vec.getY();
389 	fptr[2] = vec.getZ();
390 }
391 
loadXYZArray(Vector3 & vec0,Vector3 & vec1,Vector3 & vec2,Vector3 & vec3,const __m128 * threeQuads)392 VECTORMATH_FORCE_INLINE void loadXYZArray(Vector3 &vec0, Vector3 &vec1, Vector3 &vec2, Vector3 &vec3, const __m128 *threeQuads)
393 {
394 	const float *quads = (float *)threeQuads;
395 	vec0 = Vector3(_mm_load_ps(quads));
396 	vec1 = Vector3(_mm_loadu_ps(quads + 3));
397 	vec2 = Vector3(_mm_loadu_ps(quads + 6));
398 	vec3 = Vector3(_mm_loadu_ps(quads + 9));
399 }
400 
storeXYZArray(const Vector3 & vec0,const Vector3 & vec1,const Vector3 & vec2,const Vector3 & vec3,__m128 * threeQuads)401 VECTORMATH_FORCE_INLINE void storeXYZArray(const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 *threeQuads)
402 {
403 	__m128 xxxx = _mm_shuffle_ps(vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0));
404 	__m128 zzzz = _mm_shuffle_ps(vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2));
405 	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
406 	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
407 	threeQuads[0] = vec_sel(vec0.get128(), xxxx, xsw);
408 	threeQuads[1] = _mm_shuffle_ps(vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1));
409 	threeQuads[2] = vec_sel(_mm_shuffle_ps(vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3)), zzzz, zsw);
410 }
411 /*
412 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
413 {
414 	assert(0);
415 #if 0
416     __m128 xyz0[3];
417     __m128 xyz1[3];
418     storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
419     storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
420     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
421     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
422     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
423 #endif
424 }
425 */
426 VECTORMATH_FORCE_INLINE Vector3 &Vector3::operator=(const Vector3 &vec)
427 {
428 	mVec128 = vec.mVec128;
429 	return *this;
430 }
431 
setX(float _x)432 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setX(float _x)
433 {
434 	_vmathVfSetElement(mVec128, _x, 0);
435 	return *this;
436 }
437 
setX(const floatInVec & _x)438 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setX(const floatInVec &_x)
439 {
440 	mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
441 	return *this;
442 }
443 
getX()444 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getX() const
445 {
446 	return floatInVec(mVec128, 0);
447 }
448 
setY(float _y)449 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setY(float _y)
450 {
451 	_vmathVfSetElement(mVec128, _y, 1);
452 	return *this;
453 }
454 
setY(const floatInVec & _y)455 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setY(const floatInVec &_y)
456 {
457 	mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
458 	return *this;
459 }
460 
getY()461 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getY() const
462 {
463 	return floatInVec(mVec128, 1);
464 }
465 
setZ(float _z)466 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setZ(float _z)
467 {
468 	_vmathVfSetElement(mVec128, _z, 2);
469 	return *this;
470 }
471 
setZ(const floatInVec & _z)472 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setZ(const floatInVec &_z)
473 {
474 	mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
475 	return *this;
476 }
477 
getZ()478 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getZ() const
479 {
480 	return floatInVec(mVec128, 2);
481 }
482 
setElem(int idx,float value)483 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setElem(int idx, float value)
484 {
485 	_vmathVfSetElement(mVec128, value, idx);
486 	return *this;
487 }
488 
setElem(int idx,const floatInVec & value)489 VECTORMATH_FORCE_INLINE Vector3 &Vector3::setElem(int idx, const floatInVec &value)
490 {
491 	mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
492 	return *this;
493 }
494 
getElem(int idx)495 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getElem(int idx) const
496 {
497 	return floatInVec(mVec128, idx);
498 }
499 
500 VECTORMATH_FORCE_INLINE VecIdx Vector3::operator[](int idx)
501 {
502 	return VecIdx(mVec128, idx);
503 }
504 
505 VECTORMATH_FORCE_INLINE const floatInVec Vector3::operator[](int idx) const
506 {
507 	return floatInVec(mVec128, idx);
508 }
509 
510 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator+(const Vector3 &vec) const
511 {
512 	return Vector3(_mm_add_ps(mVec128, vec.mVec128));
513 }
514 
515 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator-(const Vector3 &vec) const
516 {
517 	return Vector3(_mm_sub_ps(mVec128, vec.mVec128));
518 }
519 
520 VECTORMATH_FORCE_INLINE const Point3 Vector3::operator+(const Point3 &pnt) const
521 {
522 	return Point3(_mm_add_ps(mVec128, pnt.get128()));
523 }
524 
525 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator*(float scalar) const
526 {
527 	return *this * floatInVec(scalar);
528 }
529 
530 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator*(const floatInVec &scalar) const
531 {
532 	return Vector3(_mm_mul_ps(mVec128, scalar.get128()));
533 }
534 
535 VECTORMATH_FORCE_INLINE Vector3 &Vector3::operator+=(const Vector3 &vec)
536 {
537 	*this = *this + vec;
538 	return *this;
539 }
540 
541 VECTORMATH_FORCE_INLINE Vector3 &Vector3::operator-=(const Vector3 &vec)
542 {
543 	*this = *this - vec;
544 	return *this;
545 }
546 
547 VECTORMATH_FORCE_INLINE Vector3 &Vector3::operator*=(float scalar)
548 {
549 	*this = *this * scalar;
550 	return *this;
551 }
552 
553 VECTORMATH_FORCE_INLINE Vector3 &Vector3::operator*=(const floatInVec &scalar)
554 {
555 	*this = *this * scalar;
556 	return *this;
557 }
558 
559 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator/(float scalar) const
560 {
561 	return *this / floatInVec(scalar);
562 }
563 
564 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator/(const floatInVec &scalar) const
565 {
566 	return Vector3(_mm_div_ps(mVec128, scalar.get128()));
567 }
568 
569 VECTORMATH_FORCE_INLINE Vector3 &Vector3::operator/=(float scalar)
570 {
571 	*this = *this / scalar;
572 	return *this;
573 }
574 
575 VECTORMATH_FORCE_INLINE Vector3 &Vector3::operator/=(const floatInVec &scalar)
576 {
577 	*this = *this / scalar;
578 	return *this;
579 }
580 
581 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator-() const
582 {
583 	//return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
584 
585 	VM_ATTRIBUTE_ALIGN16 static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
586 	__m128 NEG_MASK = SSEFloat(*(const vec_float4 *)array).vf;
587 	return Vector3(_mm_xor_ps(get128(), NEG_MASK));
588 }
589 
590 VECTORMATH_FORCE_INLINE const Vector3 operator*(float scalar, const Vector3 &vec)
591 {
592 	return floatInVec(scalar) * vec;
593 }
594 
595 VECTORMATH_FORCE_INLINE const Vector3 operator*(const floatInVec &scalar, const Vector3 &vec)
596 {
597 	return vec * scalar;
598 }
599 
mulPerElem(const Vector3 & vec0,const Vector3 & vec1)600 VECTORMATH_FORCE_INLINE const Vector3 mulPerElem(const Vector3 &vec0, const Vector3 &vec1)
601 {
602 	return Vector3(_mm_mul_ps(vec0.get128(), vec1.get128()));
603 }
604 
divPerElem(const Vector3 & vec0,const Vector3 & vec1)605 VECTORMATH_FORCE_INLINE const Vector3 divPerElem(const Vector3 &vec0, const Vector3 &vec1)
606 {
607 	return Vector3(_mm_div_ps(vec0.get128(), vec1.get128()));
608 }
609 
recipPerElem(const Vector3 & vec)610 VECTORMATH_FORCE_INLINE const Vector3 recipPerElem(const Vector3 &vec)
611 {
612 	return Vector3(_mm_rcp_ps(vec.get128()));
613 }
614 
absPerElem(const Vector3 & vec)615 VECTORMATH_FORCE_INLINE const Vector3 absPerElem(const Vector3 &vec)
616 {
617 	return Vector3(fabsf4(vec.get128()));
618 }
619 
copySignPerElem(const Vector3 & vec0,const Vector3 & vec1)620 VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem(const Vector3 &vec0, const Vector3 &vec1)
621 {
622 	__m128 vmask = toM128(0x7fffffff);
623 	return Vector3(_mm_or_ps(
624 		_mm_and_ps(vmask, vec0.get128()),       // Value
625 		_mm_andnot_ps(vmask, vec1.get128())));  // Signs
626 }
627 
maxPerElem(const Vector3 & vec0,const Vector3 & vec1)628 VECTORMATH_FORCE_INLINE const Vector3 maxPerElem(const Vector3 &vec0, const Vector3 &vec1)
629 {
630 	return Vector3(_mm_max_ps(vec0.get128(), vec1.get128()));
631 }
632 
maxElem(const Vector3 & vec)633 VECTORMATH_FORCE_INLINE const floatInVec maxElem(const Vector3 &vec)
634 {
635 	return floatInVec(_mm_max_ps(_mm_max_ps(vec_splat(vec.get128(), 0), vec_splat(vec.get128(), 1)), vec_splat(vec.get128(), 2)));
636 }
637 
minPerElem(const Vector3 & vec0,const Vector3 & vec1)638 VECTORMATH_FORCE_INLINE const Vector3 minPerElem(const Vector3 &vec0, const Vector3 &vec1)
639 {
640 	return Vector3(_mm_min_ps(vec0.get128(), vec1.get128()));
641 }
642 
minElem(const Vector3 & vec)643 VECTORMATH_FORCE_INLINE const floatInVec minElem(const Vector3 &vec)
644 {
645 	return floatInVec(_mm_min_ps(_mm_min_ps(vec_splat(vec.get128(), 0), vec_splat(vec.get128(), 1)), vec_splat(vec.get128(), 2)));
646 }
647 
sum(const Vector3 & vec)648 VECTORMATH_FORCE_INLINE const floatInVec sum(const Vector3 &vec)
649 {
650 	return floatInVec(_mm_add_ps(_mm_add_ps(vec_splat(vec.get128(), 0), vec_splat(vec.get128(), 1)), vec_splat(vec.get128(), 2)));
651 }
652 
dot(const Vector3 & vec0,const Vector3 & vec1)653 VECTORMATH_FORCE_INLINE const floatInVec dot(const Vector3 &vec0, const Vector3 &vec1)
654 {
655 	return floatInVec(_vmathVfDot3(vec0.get128(), vec1.get128()), 0);
656 }
657 
lengthSqr(const Vector3 & vec)658 VECTORMATH_FORCE_INLINE const floatInVec lengthSqr(const Vector3 &vec)
659 {
660 	return floatInVec(_vmathVfDot3(vec.get128(), vec.get128()), 0);
661 }
662 
length(const Vector3 & vec)663 VECTORMATH_FORCE_INLINE const floatInVec length(const Vector3 &vec)
664 {
665 	return floatInVec(_mm_sqrt_ps(_vmathVfDot3(vec.get128(), vec.get128())), 0);
666 }
667 
normalizeApprox(const Vector3 & vec)668 VECTORMATH_FORCE_INLINE const Vector3 normalizeApprox(const Vector3 &vec)
669 {
670 	return Vector3(_mm_mul_ps(vec.get128(), _mm_rsqrt_ps(_vmathVfDot3(vec.get128(), vec.get128()))));
671 }
672 
normalize(const Vector3 & vec)673 VECTORMATH_FORCE_INLINE const Vector3 normalize(const Vector3 &vec)
674 {
675 	return Vector3(_mm_mul_ps(vec.get128(), newtonrapson_rsqrt4(_vmathVfDot3(vec.get128(), vec.get128()))));
676 }
677 
cross(const Vector3 & vec0,const Vector3 & vec1)678 VECTORMATH_FORCE_INLINE const Vector3 cross(const Vector3 &vec0, const Vector3 &vec1)
679 {
680 	return Vector3(_vmathVfCross(vec0.get128(), vec1.get128()));
681 }
682 
select(const Vector3 & vec0,const Vector3 & vec1,bool select1)683 VECTORMATH_FORCE_INLINE const Vector3 select(const Vector3 &vec0, const Vector3 &vec1, bool select1)
684 {
685 	return select(vec0, vec1, boolInVec(select1));
686 }
687 
select(const Vector4 & vec0,const Vector4 & vec1,const boolInVec & select1)688 VECTORMATH_FORCE_INLINE const Vector4 select(const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1)
689 {
690 	return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128()));
691 }
692 
693 #ifdef _VECTORMATH_DEBUG
694 
print(const Vector3 & vec)695 VECTORMATH_FORCE_INLINE void print(const Vector3 &vec)
696 {
697 	union {
698 		__m128 v;
699 		float s[4];
700 	} tmp;
701 	tmp.v = vec.get128();
702 	printf("( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2]);
703 }
704 
print(const Vector3 & vec,const char * name)705 VECTORMATH_FORCE_INLINE void print(const Vector3 &vec, const char *name)
706 {
707 	union {
708 		__m128 v;
709 		float s[4];
710 	} tmp;
711 	tmp.v = vec.get128();
712 	printf("%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2]);
713 }
714 
715 #endif
716 
Vector4(float _x,float _y,float _z,float _w)717 VECTORMATH_FORCE_INLINE Vector4::Vector4(float _x, float _y, float _z, float _w)
718 {
719 	mVec128 = _mm_setr_ps(_x, _y, _z, _w);
720 }
721 
Vector4(const floatInVec & _x,const floatInVec & _y,const floatInVec & _z,const floatInVec & _w)722 VECTORMATH_FORCE_INLINE Vector4::Vector4(const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w)
723 {
724 	mVec128 = _mm_unpacklo_ps(
725 		_mm_unpacklo_ps(_x.get128(), _z.get128()),
726 		_mm_unpacklo_ps(_y.get128(), _w.get128()));
727 }
728 
Vector4(const Vector3 & xyz,float _w)729 VECTORMATH_FORCE_INLINE Vector4::Vector4(const Vector3 &xyz, float _w)
730 {
731 	mVec128 = xyz.get128();
732 	_vmathVfSetElement(mVec128, _w, 3);
733 }
734 
Vector4(const Vector3 & xyz,const floatInVec & _w)735 VECTORMATH_FORCE_INLINE Vector4::Vector4(const Vector3 &xyz, const floatInVec &_w)
736 {
737 	mVec128 = xyz.get128();
738 	mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
739 }
740 
Vector4(const Vector3 & vec)741 VECTORMATH_FORCE_INLINE Vector4::Vector4(const Vector3 &vec)
742 {
743 	mVec128 = vec.get128();
744 	mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
745 }
746 
Vector4(const Point3 & pnt)747 VECTORMATH_FORCE_INLINE Vector4::Vector4(const Point3 &pnt)
748 {
749 	mVec128 = pnt.get128();
750 	mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
751 }
752 
Vector4(const Quat & quat)753 VECTORMATH_FORCE_INLINE Vector4::Vector4(const Quat &quat)
754 {
755 	mVec128 = quat.get128();
756 }
757 
Vector4(float scalar)758 VECTORMATH_FORCE_INLINE Vector4::Vector4(float scalar)
759 {
760 	mVec128 = floatInVec(scalar).get128();
761 }
762 
Vector4(const floatInVec & scalar)763 VECTORMATH_FORCE_INLINE Vector4::Vector4(const floatInVec &scalar)
764 {
765 	mVec128 = scalar.get128();
766 }
767 
Vector4(__m128 vf4)768 VECTORMATH_FORCE_INLINE Vector4::Vector4(__m128 vf4)
769 {
770 	mVec128 = vf4;
771 }
772 
xAxis()773 VECTORMATH_FORCE_INLINE const Vector4 Vector4::xAxis()
774 {
775 	return Vector4(_VECTORMATH_UNIT_1000);
776 }
777 
yAxis()778 VECTORMATH_FORCE_INLINE const Vector4 Vector4::yAxis()
779 {
780 	return Vector4(_VECTORMATH_UNIT_0100);
781 }
782 
zAxis()783 VECTORMATH_FORCE_INLINE const Vector4 Vector4::zAxis()
784 {
785 	return Vector4(_VECTORMATH_UNIT_0010);
786 }
787 
wAxis()788 VECTORMATH_FORCE_INLINE const Vector4 Vector4::wAxis()
789 {
790 	return Vector4(_VECTORMATH_UNIT_0001);
791 }
792 
lerp(float t,const Vector4 & vec0,const Vector4 & vec1)793 VECTORMATH_FORCE_INLINE const Vector4 lerp(float t, const Vector4 &vec0, const Vector4 &vec1)
794 {
795 	return lerp(floatInVec(t), vec0, vec1);
796 }
797 
lerp(const floatInVec & t,const Vector4 & vec0,const Vector4 & vec1)798 VECTORMATH_FORCE_INLINE const Vector4 lerp(const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1)
799 {
800 	return (vec0 + ((vec1 - vec0) * t));
801 }
802 
slerp(float t,const Vector4 & unitVec0,const Vector4 & unitVec1)803 VECTORMATH_FORCE_INLINE const Vector4 slerp(float t, const Vector4 &unitVec0, const Vector4 &unitVec1)
804 {
805 	return slerp(floatInVec(t), unitVec0, unitVec1);
806 }
807 
slerp(const floatInVec & t,const Vector4 & unitVec0,const Vector4 & unitVec1)808 VECTORMATH_FORCE_INLINE const Vector4 slerp(const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1)
809 {
810 	__m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
811 	cosAngle = _vmathVfDot4(unitVec0.get128(), unitVec1.get128());
812 	__m128 selectMask = _mm_cmpgt_ps(_mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle);
813 	angle = acosf4(cosAngle);
814 	tttt = t.get128();
815 	oneMinusT = _mm_sub_ps(_mm_set1_ps(1.0f), tttt);
816 	angles = _mm_unpacklo_ps(_mm_set1_ps(1.0f), tttt);  // angles = 1, t, 1, t
817 	angles = _mm_unpacklo_ps(angles, oneMinusT);        // angles = 1, 1-t, t, 1-t
818 	angles = _mm_mul_ps(angles, angle);
819 	sines = sinf4(angles);
820 	scales = _mm_div_ps(sines, vec_splat(sines, 0));
821 	scale0 = vec_sel(oneMinusT, vec_splat(scales, 1), selectMask);
822 	scale1 = vec_sel(tttt, vec_splat(scales, 2), selectMask);
823 	return Vector4(vec_madd(unitVec0.get128(), scale0, _mm_mul_ps(unitVec1.get128(), scale1)));
824 }
825 
get128()826 VECTORMATH_FORCE_INLINE __m128 Vector4::get128() const
827 {
828 	return mVec128;
829 }
830 /*
831 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
832 {
833     twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
834     twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
835 }
836 */
837 VECTORMATH_FORCE_INLINE Vector4 &Vector4::operator=(const Vector4 &vec)
838 {
839 	mVec128 = vec.mVec128;
840 	return *this;
841 }
842 
setXYZ(const Vector3 & vec)843 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setXYZ(const Vector3 &vec)
844 {
845 	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
846 	mVec128 = vec_sel(vec.get128(), mVec128, sw);
847 	return *this;
848 }
849 
getXYZ()850 VECTORMATH_FORCE_INLINE const Vector3 Vector4::getXYZ() const
851 {
852 	return Vector3(mVec128);
853 }
854 
setX(float _x)855 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setX(float _x)
856 {
857 	_vmathVfSetElement(mVec128, _x, 0);
858 	return *this;
859 }
860 
setX(const floatInVec & _x)861 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setX(const floatInVec &_x)
862 {
863 	mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
864 	return *this;
865 }
866 
getX()867 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getX() const
868 {
869 	return floatInVec(mVec128, 0);
870 }
871 
setY(float _y)872 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setY(float _y)
873 {
874 	_vmathVfSetElement(mVec128, _y, 1);
875 	return *this;
876 }
877 
setY(const floatInVec & _y)878 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setY(const floatInVec &_y)
879 {
880 	mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
881 	return *this;
882 }
883 
getY()884 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getY() const
885 {
886 	return floatInVec(mVec128, 1);
887 }
888 
setZ(float _z)889 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setZ(float _z)
890 {
891 	_vmathVfSetElement(mVec128, _z, 2);
892 	return *this;
893 }
894 
setZ(const floatInVec & _z)895 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setZ(const floatInVec &_z)
896 {
897 	mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
898 	return *this;
899 }
900 
getZ()901 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getZ() const
902 {
903 	return floatInVec(mVec128, 2);
904 }
905 
setW(float _w)906 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setW(float _w)
907 {
908 	_vmathVfSetElement(mVec128, _w, 3);
909 	return *this;
910 }
911 
setW(const floatInVec & _w)912 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setW(const floatInVec &_w)
913 {
914 	mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
915 	return *this;
916 }
917 
getW()918 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getW() const
919 {
920 	return floatInVec(mVec128, 3);
921 }
922 
setElem(int idx,float value)923 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setElem(int idx, float value)
924 {
925 	_vmathVfSetElement(mVec128, value, idx);
926 	return *this;
927 }
928 
setElem(int idx,const floatInVec & value)929 VECTORMATH_FORCE_INLINE Vector4 &Vector4::setElem(int idx, const floatInVec &value)
930 {
931 	mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
932 	return *this;
933 }
934 
getElem(int idx)935 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getElem(int idx) const
936 {
937 	return floatInVec(mVec128, idx);
938 }
939 
940 VECTORMATH_FORCE_INLINE VecIdx Vector4::operator[](int idx)
941 {
942 	return VecIdx(mVec128, idx);
943 }
944 
945 VECTORMATH_FORCE_INLINE const floatInVec Vector4::operator[](int idx) const
946 {
947 	return floatInVec(mVec128, idx);
948 }
949 
950 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator+(const Vector4 &vec) const
951 {
952 	return Vector4(_mm_add_ps(mVec128, vec.mVec128));
953 }
954 
955 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator-(const Vector4 &vec) const
956 {
957 	return Vector4(_mm_sub_ps(mVec128, vec.mVec128));
958 }
959 
960 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator*(float scalar) const
961 {
962 	return *this * floatInVec(scalar);
963 }
964 
965 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator*(const floatInVec &scalar) const
966 {
967 	return Vector4(_mm_mul_ps(mVec128, scalar.get128()));
968 }
969 
970 VECTORMATH_FORCE_INLINE Vector4 &Vector4::operator+=(const Vector4 &vec)
971 {
972 	*this = *this + vec;
973 	return *this;
974 }
975 
976 VECTORMATH_FORCE_INLINE Vector4 &Vector4::operator-=(const Vector4 &vec)
977 {
978 	*this = *this - vec;
979 	return *this;
980 }
981 
982 VECTORMATH_FORCE_INLINE Vector4 &Vector4::operator*=(float scalar)
983 {
984 	*this = *this * scalar;
985 	return *this;
986 }
987 
988 VECTORMATH_FORCE_INLINE Vector4 &Vector4::operator*=(const floatInVec &scalar)
989 {
990 	*this = *this * scalar;
991 	return *this;
992 }
993 
994 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator/(float scalar) const
995 {
996 	return *this / floatInVec(scalar);
997 }
998 
999 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator/(const floatInVec &scalar) const
1000 {
1001 	return Vector4(_mm_div_ps(mVec128, scalar.get128()));
1002 }
1003 
1004 VECTORMATH_FORCE_INLINE Vector4 &Vector4::operator/=(float scalar)
1005 {
1006 	*this = *this / scalar;
1007 	return *this;
1008 }
1009 
1010 VECTORMATH_FORCE_INLINE Vector4 &Vector4::operator/=(const floatInVec &scalar)
1011 {
1012 	*this = *this / scalar;
1013 	return *this;
1014 }
1015 
1016 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator-() const
1017 {
1018 	return Vector4(_mm_sub_ps(_mm_setzero_ps(), mVec128));
1019 }
1020 
1021 VECTORMATH_FORCE_INLINE const Vector4 operator*(float scalar, const Vector4 &vec)
1022 {
1023 	return floatInVec(scalar) * vec;
1024 }
1025 
1026 VECTORMATH_FORCE_INLINE const Vector4 operator*(const floatInVec &scalar, const Vector4 &vec)
1027 {
1028 	return vec * scalar;
1029 }
1030 
mulPerElem(const Vector4 & vec0,const Vector4 & vec1)1031 VECTORMATH_FORCE_INLINE const Vector4 mulPerElem(const Vector4 &vec0, const Vector4 &vec1)
1032 {
1033 	return Vector4(_mm_mul_ps(vec0.get128(), vec1.get128()));
1034 }
1035 
divPerElem(const Vector4 & vec0,const Vector4 & vec1)1036 VECTORMATH_FORCE_INLINE const Vector4 divPerElem(const Vector4 &vec0, const Vector4 &vec1)
1037 {
1038 	return Vector4(_mm_div_ps(vec0.get128(), vec1.get128()));
1039 }
1040 
recipPerElem(const Vector4 & vec)1041 VECTORMATH_FORCE_INLINE const Vector4 recipPerElem(const Vector4 &vec)
1042 {
1043 	return Vector4(_mm_rcp_ps(vec.get128()));
1044 }
1045 
absPerElem(const Vector4 & vec)1046 VECTORMATH_FORCE_INLINE const Vector4 absPerElem(const Vector4 &vec)
1047 {
1048 	return Vector4(fabsf4(vec.get128()));
1049 }
1050 
copySignPerElem(const Vector4 & vec0,const Vector4 & vec1)1051 VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem(const Vector4 &vec0, const Vector4 &vec1)
1052 {
1053 	__m128 vmask = toM128(0x7fffffff);
1054 	return Vector4(_mm_or_ps(
1055 		_mm_and_ps(vmask, vec0.get128()),       // Value
1056 		_mm_andnot_ps(vmask, vec1.get128())));  // Signs
1057 }
1058 
maxPerElem(const Vector4 & vec0,const Vector4 & vec1)1059 VECTORMATH_FORCE_INLINE const Vector4 maxPerElem(const Vector4 &vec0, const Vector4 &vec1)
1060 {
1061 	return Vector4(_mm_max_ps(vec0.get128(), vec1.get128()));
1062 }
1063 
maxElem(const Vector4 & vec)1064 VECTORMATH_FORCE_INLINE const floatInVec maxElem(const Vector4 &vec)
1065 {
1066 	return floatInVec(_mm_max_ps(
1067 		_mm_max_ps(vec_splat(vec.get128(), 0), vec_splat(vec.get128(), 1)),
1068 		_mm_max_ps(vec_splat(vec.get128(), 2), vec_splat(vec.get128(), 3))));
1069 }
1070 
minPerElem(const Vector4 & vec0,const Vector4 & vec1)1071 VECTORMATH_FORCE_INLINE const Vector4 minPerElem(const Vector4 &vec0, const Vector4 &vec1)
1072 {
1073 	return Vector4(_mm_min_ps(vec0.get128(), vec1.get128()));
1074 }
1075 
minElem(const Vector4 & vec)1076 VECTORMATH_FORCE_INLINE const floatInVec minElem(const Vector4 &vec)
1077 {
1078 	return floatInVec(_mm_min_ps(
1079 		_mm_min_ps(vec_splat(vec.get128(), 0), vec_splat(vec.get128(), 1)),
1080 		_mm_min_ps(vec_splat(vec.get128(), 2), vec_splat(vec.get128(), 3))));
1081 }
1082 
sum(const Vector4 & vec)1083 VECTORMATH_FORCE_INLINE const floatInVec sum(const Vector4 &vec)
1084 {
1085 	return floatInVec(_mm_add_ps(
1086 		_mm_add_ps(vec_splat(vec.get128(), 0), vec_splat(vec.get128(), 1)),
1087 		_mm_add_ps(vec_splat(vec.get128(), 2), vec_splat(vec.get128(), 3))));
1088 }
1089 
dot(const Vector4 & vec0,const Vector4 & vec1)1090 VECTORMATH_FORCE_INLINE const floatInVec dot(const Vector4 &vec0, const Vector4 &vec1)
1091 {
1092 	return floatInVec(_vmathVfDot4(vec0.get128(), vec1.get128()), 0);
1093 }
1094 
lengthSqr(const Vector4 & vec)1095 VECTORMATH_FORCE_INLINE const floatInVec lengthSqr(const Vector4 &vec)
1096 {
1097 	return floatInVec(_vmathVfDot4(vec.get128(), vec.get128()), 0);
1098 }
1099 
length(const Vector4 & vec)1100 VECTORMATH_FORCE_INLINE const floatInVec length(const Vector4 &vec)
1101 {
1102 	return floatInVec(_mm_sqrt_ps(_vmathVfDot4(vec.get128(), vec.get128())), 0);
1103 }
1104 
normalizeApprox(const Vector4 & vec)1105 VECTORMATH_FORCE_INLINE const Vector4 normalizeApprox(const Vector4 &vec)
1106 {
1107 	return Vector4(_mm_mul_ps(vec.get128(), _mm_rsqrt_ps(_vmathVfDot4(vec.get128(), vec.get128()))));
1108 }
1109 
normalize(const Vector4 & vec)1110 VECTORMATH_FORCE_INLINE const Vector4 normalize(const Vector4 &vec)
1111 {
1112 	return Vector4(_mm_mul_ps(vec.get128(), newtonrapson_rsqrt4(_vmathVfDot4(vec.get128(), vec.get128()))));
1113 }
1114 
select(const Vector4 & vec0,const Vector4 & vec1,bool select1)1115 VECTORMATH_FORCE_INLINE const Vector4 select(const Vector4 &vec0, const Vector4 &vec1, bool select1)
1116 {
1117 	return select(vec0, vec1, boolInVec(select1));
1118 }
1119 
1120 #ifdef _VECTORMATH_DEBUG
1121 
print(const Vector4 & vec)1122 VECTORMATH_FORCE_INLINE void print(const Vector4 &vec)
1123 {
1124 	union {
1125 		__m128 v;
1126 		float s[4];
1127 	} tmp;
1128 	tmp.v = vec.get128();
1129 	printf("( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3]);
1130 }
1131 
print(const Vector4 & vec,const char * name)1132 VECTORMATH_FORCE_INLINE void print(const Vector4 &vec, const char *name)
1133 {
1134 	union {
1135 		__m128 v;
1136 		float s[4];
1137 	} tmp;
1138 	tmp.v = vec.get128();
1139 	printf("%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3]);
1140 }
1141 
1142 #endif
1143 
Point3(float _x,float _y,float _z)1144 VECTORMATH_FORCE_INLINE Point3::Point3(float _x, float _y, float _z)
1145 {
1146 	mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
1147 }
1148 
Point3(const floatInVec & _x,const floatInVec & _y,const floatInVec & _z)1149 VECTORMATH_FORCE_INLINE Point3::Point3(const floatInVec &_x, const floatInVec &_y, const floatInVec &_z)
1150 {
1151 	mVec128 = _mm_unpacklo_ps(_mm_unpacklo_ps(_x.get128(), _z.get128()), _y.get128());
1152 }
1153 
Point3(const Vector3 & vec)1154 VECTORMATH_FORCE_INLINE Point3::Point3(const Vector3 &vec)
1155 {
1156 	mVec128 = vec.get128();
1157 }
1158 
Point3(float scalar)1159 VECTORMATH_FORCE_INLINE Point3::Point3(float scalar)
1160 {
1161 	mVec128 = floatInVec(scalar).get128();
1162 }
1163 
Point3(const floatInVec & scalar)1164 VECTORMATH_FORCE_INLINE Point3::Point3(const floatInVec &scalar)
1165 {
1166 	mVec128 = scalar.get128();
1167 }
1168 
Point3(__m128 vf4)1169 VECTORMATH_FORCE_INLINE Point3::Point3(__m128 vf4)
1170 {
1171 	mVec128 = vf4;
1172 }
1173 
lerp(float t,const Point3 & pnt0,const Point3 & pnt1)1174 VECTORMATH_FORCE_INLINE const Point3 lerp(float t, const Point3 &pnt0, const Point3 &pnt1)
1175 {
1176 	return lerp(floatInVec(t), pnt0, pnt1);
1177 }
1178 
lerp(const floatInVec & t,const Point3 & pnt0,const Point3 & pnt1)1179 VECTORMATH_FORCE_INLINE const Point3 lerp(const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1)
1180 {
1181 	return (pnt0 + ((pnt1 - pnt0) * t));
1182 }
1183 
get128()1184 VECTORMATH_FORCE_INLINE __m128 Point3::get128() const
1185 {
1186 	return mVec128;
1187 }
1188 
storeXYZ(const Point3 & pnt,__m128 * quad)1189 VECTORMATH_FORCE_INLINE void storeXYZ(const Point3 &pnt, __m128 *quad)
1190 {
1191 	__m128 dstVec = *quad;
1192 	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};  // TODO: Centralize
1193 	dstVec = vec_sel(pnt.get128(), dstVec, sw);
1194 	*quad = dstVec;
1195 }
1196 
loadXYZArray(Point3 & pnt0,Point3 & pnt1,Point3 & pnt2,Point3 & pnt3,const __m128 * threeQuads)1197 VECTORMATH_FORCE_INLINE void loadXYZArray(Point3 &pnt0, Point3 &pnt1, Point3 &pnt2, Point3 &pnt3, const __m128 *threeQuads)
1198 {
1199 	const float *quads = (float *)threeQuads;
1200 	pnt0 = Point3(_mm_load_ps(quads));
1201 	pnt1 = Point3(_mm_loadu_ps(quads + 3));
1202 	pnt2 = Point3(_mm_loadu_ps(quads + 6));
1203 	pnt3 = Point3(_mm_loadu_ps(quads + 9));
1204 }
1205 
storeXYZArray(const Point3 & pnt0,const Point3 & pnt1,const Point3 & pnt2,const Point3 & pnt3,__m128 * threeQuads)1206 VECTORMATH_FORCE_INLINE void storeXYZArray(const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 *threeQuads)
1207 {
1208 	__m128 xxxx = _mm_shuffle_ps(pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0));
1209 	__m128 zzzz = _mm_shuffle_ps(pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2));
1210 	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
1211 	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
1212 	threeQuads[0] = vec_sel(pnt0.get128(), xxxx, xsw);
1213 	threeQuads[1] = _mm_shuffle_ps(pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1));
1214 	threeQuads[2] = vec_sel(_mm_shuffle_ps(pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3)), zzzz, zsw);
1215 }
1216 /*
1217 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
1218 {
1219 #if 0
1220     __m128 xyz0[3];
1221     __m128 xyz1[3];
1222     storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
1223     storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
1224     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
1225     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
1226     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
1227 #else
1228 	assert(0);
1229 #endif
1230 }
1231 */
1232 VECTORMATH_FORCE_INLINE Point3 &Point3::operator=(const Point3 &pnt)
1233 {
1234 	mVec128 = pnt.mVec128;
1235 	return *this;
1236 }
1237 
setX(float _x)1238 VECTORMATH_FORCE_INLINE Point3 &Point3::setX(float _x)
1239 {
1240 	_vmathVfSetElement(mVec128, _x, 0);
1241 	return *this;
1242 }
1243 
setX(const floatInVec & _x)1244 VECTORMATH_FORCE_INLINE Point3 &Point3::setX(const floatInVec &_x)
1245 {
1246 	mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
1247 	return *this;
1248 }
1249 
getX()1250 VECTORMATH_FORCE_INLINE const floatInVec Point3::getX() const
1251 {
1252 	return floatInVec(mVec128, 0);
1253 }
1254 
setY(float _y)1255 VECTORMATH_FORCE_INLINE Point3 &Point3::setY(float _y)
1256 {
1257 	_vmathVfSetElement(mVec128, _y, 1);
1258 	return *this;
1259 }
1260 
setY(const floatInVec & _y)1261 VECTORMATH_FORCE_INLINE Point3 &Point3::setY(const floatInVec &_y)
1262 {
1263 	mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
1264 	return *this;
1265 }
1266 
getY()1267 VECTORMATH_FORCE_INLINE const floatInVec Point3::getY() const
1268 {
1269 	return floatInVec(mVec128, 1);
1270 }
1271 
setZ(float _z)1272 VECTORMATH_FORCE_INLINE Point3 &Point3::setZ(float _z)
1273 {
1274 	_vmathVfSetElement(mVec128, _z, 2);
1275 	return *this;
1276 }
1277 
setZ(const floatInVec & _z)1278 VECTORMATH_FORCE_INLINE Point3 &Point3::setZ(const floatInVec &_z)
1279 {
1280 	mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
1281 	return *this;
1282 }
1283 
getZ()1284 VECTORMATH_FORCE_INLINE const floatInVec Point3::getZ() const
1285 {
1286 	return floatInVec(mVec128, 2);
1287 }
1288 
setElem(int idx,float value)1289 VECTORMATH_FORCE_INLINE Point3 &Point3::setElem(int idx, float value)
1290 {
1291 	_vmathVfSetElement(mVec128, value, idx);
1292 	return *this;
1293 }
1294 
setElem(int idx,const floatInVec & value)1295 VECTORMATH_FORCE_INLINE Point3 &Point3::setElem(int idx, const floatInVec &value)
1296 {
1297 	mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
1298 	return *this;
1299 }
1300 
getElem(int idx)1301 VECTORMATH_FORCE_INLINE const floatInVec Point3::getElem(int idx) const
1302 {
1303 	return floatInVec(mVec128, idx);
1304 }
1305 
1306 VECTORMATH_FORCE_INLINE VecIdx Point3::operator[](int idx)
1307 {
1308 	return VecIdx(mVec128, idx);
1309 }
1310 
1311 VECTORMATH_FORCE_INLINE const floatInVec Point3::operator[](int idx) const
1312 {
1313 	return floatInVec(mVec128, idx);
1314 }
1315 
1316 VECTORMATH_FORCE_INLINE const Vector3 Point3::operator-(const Point3 &pnt) const
1317 {
1318 	return Vector3(_mm_sub_ps(mVec128, pnt.mVec128));
1319 }
1320 
1321 VECTORMATH_FORCE_INLINE const Point3 Point3::operator+(const Vector3 &vec) const
1322 {
1323 	return Point3(_mm_add_ps(mVec128, vec.get128()));
1324 }
1325 
1326 VECTORMATH_FORCE_INLINE const Point3 Point3::operator-(const Vector3 &vec) const
1327 {
1328 	return Point3(_mm_sub_ps(mVec128, vec.get128()));
1329 }
1330 
1331 VECTORMATH_FORCE_INLINE Point3 &Point3::operator+=(const Vector3 &vec)
1332 {
1333 	*this = *this + vec;
1334 	return *this;
1335 }
1336 
1337 VECTORMATH_FORCE_INLINE Point3 &Point3::operator-=(const Vector3 &vec)
1338 {
1339 	*this = *this - vec;
1340 	return *this;
1341 }
1342 
mulPerElem(const Point3 & pnt0,const Point3 & pnt1)1343 VECTORMATH_FORCE_INLINE const Point3 mulPerElem(const Point3 &pnt0, const Point3 &pnt1)
1344 {
1345 	return Point3(_mm_mul_ps(pnt0.get128(), pnt1.get128()));
1346 }
1347 
divPerElem(const Point3 & pnt0,const Point3 & pnt1)1348 VECTORMATH_FORCE_INLINE const Point3 divPerElem(const Point3 &pnt0, const Point3 &pnt1)
1349 {
1350 	return Point3(_mm_div_ps(pnt0.get128(), pnt1.get128()));
1351 }
1352 
recipPerElem(const Point3 & pnt)1353 VECTORMATH_FORCE_INLINE const Point3 recipPerElem(const Point3 &pnt)
1354 {
1355 	return Point3(_mm_rcp_ps(pnt.get128()));
1356 }
1357 
absPerElem(const Point3 & pnt)1358 VECTORMATH_FORCE_INLINE const Point3 absPerElem(const Point3 &pnt)
1359 {
1360 	return Point3(fabsf4(pnt.get128()));
1361 }
1362 
copySignPerElem(const Point3 & pnt0,const Point3 & pnt1)1363 VECTORMATH_FORCE_INLINE const Point3 copySignPerElem(const Point3 &pnt0, const Point3 &pnt1)
1364 {
1365 	__m128 vmask = toM128(0x7fffffff);
1366 	return Point3(_mm_or_ps(
1367 		_mm_and_ps(vmask, pnt0.get128()),       // Value
1368 		_mm_andnot_ps(vmask, pnt1.get128())));  // Signs
1369 }
1370 
maxPerElem(const Point3 & pnt0,const Point3 & pnt1)1371 VECTORMATH_FORCE_INLINE const Point3 maxPerElem(const Point3 &pnt0, const Point3 &pnt1)
1372 {
1373 	return Point3(_mm_max_ps(pnt0.get128(), pnt1.get128()));
1374 }
1375 
maxElem(const Point3 & pnt)1376 VECTORMATH_FORCE_INLINE const floatInVec maxElem(const Point3 &pnt)
1377 {
1378 	return floatInVec(_mm_max_ps(_mm_max_ps(vec_splat(pnt.get128(), 0), vec_splat(pnt.get128(), 1)), vec_splat(pnt.get128(), 2)));
1379 }
1380 
minPerElem(const Point3 & pnt0,const Point3 & pnt1)1381 VECTORMATH_FORCE_INLINE const Point3 minPerElem(const Point3 &pnt0, const Point3 &pnt1)
1382 {
1383 	return Point3(_mm_min_ps(pnt0.get128(), pnt1.get128()));
1384 }
1385 
minElem(const Point3 & pnt)1386 VECTORMATH_FORCE_INLINE const floatInVec minElem(const Point3 &pnt)
1387 {
1388 	return floatInVec(_mm_min_ps(_mm_min_ps(vec_splat(pnt.get128(), 0), vec_splat(pnt.get128(), 1)), vec_splat(pnt.get128(), 2)));
1389 }
1390 
sum(const Point3 & pnt)1391 VECTORMATH_FORCE_INLINE const floatInVec sum(const Point3 &pnt)
1392 {
1393 	return floatInVec(_mm_add_ps(_mm_add_ps(vec_splat(pnt.get128(), 0), vec_splat(pnt.get128(), 1)), vec_splat(pnt.get128(), 2)));
1394 }
1395 
scale(const Point3 & pnt,float scaleVal)1396 VECTORMATH_FORCE_INLINE const Point3 scale(const Point3 &pnt, float scaleVal)
1397 {
1398 	return scale(pnt, floatInVec(scaleVal));
1399 }
1400 
scale(const Point3 & pnt,const floatInVec & scaleVal)1401 VECTORMATH_FORCE_INLINE const Point3 scale(const Point3 &pnt, const floatInVec &scaleVal)
1402 {
1403 	return mulPerElem(pnt, Point3(scaleVal));
1404 }
1405 
scale(const Point3 & pnt,const Vector3 & scaleVec)1406 VECTORMATH_FORCE_INLINE const Point3 scale(const Point3 &pnt, const Vector3 &scaleVec)
1407 {
1408 	return mulPerElem(pnt, Point3(scaleVec));
1409 }
1410 
projection(const Point3 & pnt,const Vector3 & unitVec)1411 VECTORMATH_FORCE_INLINE const floatInVec projection(const Point3 &pnt, const Vector3 &unitVec)
1412 {
1413 	return floatInVec(_vmathVfDot3(pnt.get128(), unitVec.get128()), 0);
1414 }
1415 
distSqrFromOrigin(const Point3 & pnt)1416 VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin(const Point3 &pnt)
1417 {
1418 	return lengthSqr(Vector3(pnt));
1419 }
1420 
distFromOrigin(const Point3 & pnt)1421 VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin(const Point3 &pnt)
1422 {
1423 	return length(Vector3(pnt));
1424 }
1425 
distSqr(const Point3 & pnt0,const Point3 & pnt1)1426 VECTORMATH_FORCE_INLINE const floatInVec distSqr(const Point3 &pnt0, const Point3 &pnt1)
1427 {
1428 	return lengthSqr((pnt1 - pnt0));
1429 }
1430 
dist(const Point3 & pnt0,const Point3 & pnt1)1431 VECTORMATH_FORCE_INLINE const floatInVec dist(const Point3 &pnt0, const Point3 &pnt1)
1432 {
1433 	return length((pnt1 - pnt0));
1434 }
1435 
select(const Point3 & pnt0,const Point3 & pnt1,bool select1)1436 VECTORMATH_FORCE_INLINE const Point3 select(const Point3 &pnt0, const Point3 &pnt1, bool select1)
1437 {
1438 	return select(pnt0, pnt1, boolInVec(select1));
1439 }
1440 
select(const Point3 & pnt0,const Point3 & pnt1,const boolInVec & select1)1441 VECTORMATH_FORCE_INLINE const Point3 select(const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1)
1442 {
1443 	return Point3(vec_sel(pnt0.get128(), pnt1.get128(), select1.get128()));
1444 }
1445 
1446 #ifdef _VECTORMATH_DEBUG
1447 
print(const Point3 & pnt)1448 VECTORMATH_FORCE_INLINE void print(const Point3 &pnt)
1449 {
1450 	union {
1451 		__m128 v;
1452 		float s[4];
1453 	} tmp;
1454 	tmp.v = pnt.get128();
1455 	printf("( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2]);
1456 }
1457 
print(const Point3 & pnt,const char * name)1458 VECTORMATH_FORCE_INLINE void print(const Point3 &pnt, const char *name)
1459 {
1460 	union {
1461 		__m128 v;
1462 		float s[4];
1463 	} tmp;
1464 	tmp.v = pnt.get128();
1465 	printf("%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2]);
1466 }
1467 
1468 #endif
1469 
1470 }  // namespace Aos
1471 }  // namespace Vectormath
1472 
1473 #endif
1474