1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 // Modified by Yao Wei Tjong for Urho3D
16 
17 
18 #ifndef BT_VECTOR3_H
19 #define BT_VECTOR3_H
20 
21 //#include <stdint.h>
22 #include "btScalar.h"
23 #include "btMinMax.h"
24 #include "btAlignedAllocator.h"
25 
26 #ifdef BT_USE_DOUBLE_PRECISION
27 #define btVector3Data btVector3DoubleData
28 #define btVector3DataName "btVector3DoubleData"
29 #else
30 #define btVector3Data btVector3FloatData
31 #define btVector3DataName "btVector3FloatData"
32 #endif //BT_USE_DOUBLE_PRECISION
33 
34 #if defined BT_USE_SSE
35 
36 //typedef  uint32_t __m128i __attribute__ ((vector_size(16)));
37 
38 #ifdef _MSC_VER
39 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
40 #endif
41 
42 
43 #define BT_SHUFFLE(x,y,z,w) (((w)<<6 | (z)<<4 | (y)<<2 | (x)) & 0xff)
44 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
45 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
46 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
47 #define bt_splat_ps( _a, _i )  bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
48 
49 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
50 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
51 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
52 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
53 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
54 #define btvxyzMaskf btvFFF0fMask
55 #define btvAbsfMask btCastiTo128f(btvAbsMask)
56 
57 //there is an issue with XCode 3.2 (LCx errors)
58 #define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f))
59 #define v1110		 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f))
60 #define vHalf		 (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f))
61 #define v1_5		 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f))
62 
63 //const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
64 //const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
65 //const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
66 //const __m128 ATTRIBUTE_ALIGNED16(v1_5)  = {1.5f, 1.5f, 1.5f, 1.5f};
67 
68 #endif
69 
70 #ifdef BT_USE_NEON
71 
ATTRIBUTE_ALIGNED16(btvMzeroMask)72 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
ATTRIBUTE_ALIGNED16(btvFFF0Mask)73 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF),
74 	static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0};
ATTRIBUTE_ALIGNED16(btvAbsMask)75 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
ATTRIBUTE_ALIGNED16(btv3AbsMask)76 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
77 
78 #endif
79 
80 /**@brief btVector3 can be used to represent 3D points and vectors.
81  * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
82  * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
83  */
ATTRIBUTE_ALIGNED16(class)84 ATTRIBUTE_ALIGNED16(class) btVector3
85 {
86 public:
87 
88 	BT_DECLARE_ALIGNED_ALLOCATOR();
89 
90 #if defined (__SPU__) && defined (__CELLOS_LV2__)
91 		btScalar	m_floats[4];
92 public:
93 	SIMD_FORCE_INLINE const vec_float4&	get128() const
94 	{
95 		return *((const vec_float4*)&m_floats[0]);
96 	}
97 public:
98 #else //__CELLOS_LV2__ __SPU__
99     #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
100         union {
101             btSimdFloat4      mVec128;
102             btScalar	m_floats[4];
103         };
104         SIMD_FORCE_INLINE	btSimdFloat4	get128() const
105         {
106             return mVec128;
107         }
108         SIMD_FORCE_INLINE	void	set128(btSimdFloat4 v128)
109         {
110             mVec128 = v128;
111         }
112     #else
113         btScalar	m_floats[4];
114     #endif
115 #endif //__CELLOS_LV2__ __SPU__
116 
117 	public:
118 
119   /**@brief No initialization constructor */
120 	SIMD_FORCE_INLINE btVector3()
121 	{
122 
123 	}
124 
125 
126 
127   /**@brief Constructor from scalars
128    * @param x X value
129    * @param y Y value
130    * @param z Z value
131    */
132 	SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
133 	{
134 		m_floats[0] = _x;
135 		m_floats[1] = _y;
136 		m_floats[2] = _z;
137 		m_floats[3] = btScalar(0.f);
138 	}
139 
140 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
141 	// Set Vector
142 	SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
143 	{
144 		mVec128 = v;
145 	}
146 
147 	// Copy constructor
148 	SIMD_FORCE_INLINE btVector3(const btVector3& rhs)
149 	{
150 		mVec128 = rhs.mVec128;
151 	}
152 
153 	// Assignment Operator
154 	SIMD_FORCE_INLINE btVector3&
155 	operator=(const btVector3& v)
156 	{
157 		mVec128 = v.mVec128;
158 
159 		return *this;
160 	}
161 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
162 
163 /**@brief Add a vector to this one
164  * @param The vector to add to this one */
165 	SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
166 	{
167 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
168 		mVec128 = _mm_add_ps(mVec128, v.mVec128);
169 #elif defined(BT_USE_NEON)
170 		mVec128 = vaddq_f32(mVec128, v.mVec128);
171 #else
172 		m_floats[0] += v.m_floats[0];
173 		m_floats[1] += v.m_floats[1];
174 		m_floats[2] += v.m_floats[2];
175 #endif
176 		return *this;
177 	}
178 
179 
180   /**@brief Subtract a vector from this one
181    * @param The vector to subtract */
182 	SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v)
183 	{
184 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
185 		mVec128 = _mm_sub_ps(mVec128, v.mVec128);
186 #elif defined(BT_USE_NEON)
187 		mVec128 = vsubq_f32(mVec128, v.mVec128);
188 #else
189 		m_floats[0] -= v.m_floats[0];
190 		m_floats[1] -= v.m_floats[1];
191 		m_floats[2] -= v.m_floats[2];
192 #endif
193 		return *this;
194 	}
195 
196   /**@brief Scale the vector
197    * @param s Scale factor */
198 	SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
199 	{
200 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
201 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
202 		vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
203 		mVec128 = _mm_mul_ps(mVec128, vs);
204 #elif defined(BT_USE_NEON)
205 		mVec128 = vmulq_n_f32(mVec128, s);
206 #else
207 		m_floats[0] *= s;
208 		m_floats[1] *= s;
209 		m_floats[2] *= s;
210 #endif
211 		return *this;
212 	}
213 
214   /**@brief Inversely scale the vector
215    * @param s Scale factor to divide by */
216 	SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s)
217 	{
218 		btFullAssert(s != btScalar(0.0));
219 
220 #if 0 //defined(BT_USE_SSE_IN_API)
221 // this code is not faster !
222 		__m128 vs = _mm_load_ss(&s);
223 		vs = _mm_div_ss(v1110, vs);
224 		vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
225 
226 		mVec128 = _mm_mul_ps(mVec128, vs);
227 
228 		return *this;
229 #else
230 		return *this *= btScalar(1.0) / s;
231 #endif
232 	}
233 
234   /**@brief Return the dot product
235    * @param v The other vector in the dot product */
236 	SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
237 	{
238 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
239 		__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
240 		__m128 z = _mm_movehl_ps(vd, vd);
241 		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
242 		vd = _mm_add_ss(vd, y);
243 		vd = _mm_add_ss(vd, z);
244 		return _mm_cvtss_f32(vd);
245 #elif defined(BT_USE_NEON)
246 		float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
247 		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
248 		x = vadd_f32(x, vget_high_f32(vd));
249 		return vget_lane_f32(x, 0);
250 #else
251 		return	m_floats[0] * v.m_floats[0] +
252 				m_floats[1] * v.m_floats[1] +
253 				m_floats[2] * v.m_floats[2];
254 #endif
255 	}
256 
257   /**@brief Return the length of the vector squared */
258 	SIMD_FORCE_INLINE btScalar length2() const
259 	{
260 		return dot(*this);
261 	}
262 
263   /**@brief Return the length of the vector */
264 	SIMD_FORCE_INLINE btScalar length() const
265 	{
266 		return btSqrt(length2());
267 	}
268 
269 	/**@brief Return the norm (length) of the vector */
270 	SIMD_FORCE_INLINE btScalar norm() const
271 	{
272 		return length();
273 	}
274 
275 	/**@brief Return the norm (length) of the vector */
276 	SIMD_FORCE_INLINE btScalar safeNorm() const
277 	{
278 		btScalar d = length2();
279 		//workaround for some clang/gcc issue of sqrtf(tiny number) = -INF
280 		if (d>SIMD_EPSILON)
281 			return btSqrt(d);
282 		return btScalar(0);
283 	}
284 
285   /**@brief Return the distance squared between the ends of this and another vector
286    * This is symantically treating the vector like a point */
287 	SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
288 
289   /**@brief Return the distance between the ends of this and another vector
290    * This is symantically treating the vector like a point */
291 	SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
292 
293 	SIMD_FORCE_INLINE btVector3& safeNormalize()
294 	{
295 		btScalar l2 = length2();
296 		//triNormal.normalize();
297 		if (l2 >= SIMD_EPSILON*SIMD_EPSILON)
298 		{
299 			(*this) /= btSqrt(l2);
300 		}
301 		else
302 		{
303 			setValue(1, 0, 0);
304 		}
305 		return *this;
306 	}
307 
308   /**@brief Normalize this vector
309    * x^2 + y^2 + z^2 = 1 */
310 	SIMD_FORCE_INLINE btVector3& normalize()
311 	{
312 
313 		btAssert(!fuzzyZero());
314 
315 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
316         // dot product first
317 		__m128 vd = _mm_mul_ps(mVec128, mVec128);
318 		__m128 z = _mm_movehl_ps(vd, vd);
319 		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
320 		vd = _mm_add_ss(vd, y);
321 		vd = _mm_add_ss(vd, z);
322 
323         #if 0
324         vd = _mm_sqrt_ss(vd);
325 		vd = _mm_div_ss(v1110, vd);
326 		vd = bt_splat_ps(vd, 0x80);
327 		mVec128 = _mm_mul_ps(mVec128, vd);
328         #else
329 
330         // NR step 1/sqrt(x) - vd is x, y is output
331         y = _mm_rsqrt_ss(vd); // estimate
332 
333         //  one step NR
334         z = v1_5;
335         vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
336         //x2 = vd;
337         vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
338         vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
339         z = _mm_sub_ss(z, vd);  // 1.5 - vd * 0.5 * y0 * y0
340 
341         y = _mm_mul_ss(y, z);   // y0 * (1.5 - vd * 0.5 * y0 * y0)
342 
343 		y = bt_splat_ps(y, 0x80);
344 		mVec128 = _mm_mul_ps(mVec128, y);
345 
346         #endif
347 
348 
349 		return *this;
350 #else
351 		return *this /= length();
352 #endif
353 	}
354 
355   /**@brief Return a normalized version of this vector */
356 	SIMD_FORCE_INLINE btVector3 normalized() const;
357 
358   /**@brief Return a rotated version of this vector
359    * @param wAxis The axis to rotate about
360    * @param angle The angle to rotate by */
361 	SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
362 
363   /**@brief Return the angle between this and another vector
364    * @param v The other vector */
365 	SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const
366 	{
367 		btScalar s = btSqrt(length2() * v.length2());
368 		btFullAssert(s != btScalar(0.0));
369 		return btAcos(dot(v) / s);
370 	}
371 
372   /**@brief Return a vector will the absolute values of each element */
373 	SIMD_FORCE_INLINE btVector3 absolute() const
374 	{
375 
376 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
377 		return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
378 #elif defined(BT_USE_NEON)
379 		return btVector3(vabsq_f32(mVec128));
380 #else
381 		return btVector3(
382 			btFabs(m_floats[0]),
383 			btFabs(m_floats[1]),
384 			btFabs(m_floats[2]));
385 #endif
386 	}
387 
388   /**@brief Return the cross product between this and another vector
389    * @param v The other vector */
390 	SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
391 	{
392 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
393 		__m128	T, V;
394 
395 		T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
396 		V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
397 
398 		V = _mm_mul_ps(V, mVec128);
399 		T = _mm_mul_ps(T, v.mVec128);
400 		V = _mm_sub_ps(V, T);
401 
402 		V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
403 		return btVector3(V);
404 #elif defined(BT_USE_NEON)
405 		float32x4_t T, V;
406 		// form (Y, Z, X, _) of mVec128 and v.mVec128
407 		float32x2_t Tlow = vget_low_f32(mVec128);
408 		float32x2_t Vlow = vget_low_f32(v.mVec128);
409 		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
410 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
411 
412 		V = vmulq_f32(V, mVec128);
413 		T = vmulq_f32(T, v.mVec128);
414 		V = vsubq_f32(V, T);
415 		Vlow = vget_low_f32(V);
416 		// form (Y, Z, X, _);
417 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
418 		V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
419 
420 		return btVector3(V);
421 #else
422 		return btVector3(
423 			m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
424 			m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
425 			m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
426 #endif
427 	}
428 
429 	SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
430 	{
431 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
432 		// cross:
433 		__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
434 		__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
435 
436 		V = _mm_mul_ps(V, v1.mVec128);
437 		T = _mm_mul_ps(T, v2.mVec128);
438 		V = _mm_sub_ps(V, T);
439 
440 		V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
441 
442 		// dot:
443 		V = _mm_mul_ps(V, mVec128);
444 		__m128 z = _mm_movehl_ps(V, V);
445 		__m128 y = _mm_shuffle_ps(V, V, 0x55);
446 		V = _mm_add_ss(V, y);
447 		V = _mm_add_ss(V, z);
448 		return _mm_cvtss_f32(V);
449 
450 #elif defined(BT_USE_NEON)
451 		// cross:
452 		float32x4_t T, V;
453 		// form (Y, Z, X, _) of mVec128 and v.mVec128
454 		float32x2_t Tlow = vget_low_f32(v1.mVec128);
455 		float32x2_t Vlow = vget_low_f32(v2.mVec128);
456 		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
457 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
458 
459 		V = vmulq_f32(V, v1.mVec128);
460 		T = vmulq_f32(T, v2.mVec128);
461 		V = vsubq_f32(V, T);
462 		Vlow = vget_low_f32(V);
463 		// form (Y, Z, X, _);
464 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
465 
466 		// dot:
467 		V = vmulq_f32(mVec128, V);
468 		float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
469 		x = vadd_f32(x, vget_high_f32(V));
470 		return vget_lane_f32(x, 0);
471 #else
472 		return
473 			m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
474 			m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
475 			m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
476 #endif
477 	}
478 
479   /**@brief Return the axis with the smallest value
480    * Note return values are 0,1,2 for x, y, or z */
481 	SIMD_FORCE_INLINE int minAxis() const
482 	{
483 		return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
484 	}
485 
486   /**@brief Return the axis with the largest value
487    * Note return values are 0,1,2 for x, y, or z */
488 	SIMD_FORCE_INLINE int maxAxis() const
489 	{
490 		return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
491 	}
492 
493 	SIMD_FORCE_INLINE int furthestAxis() const
494 	{
495 		return absolute().minAxis();
496 	}
497 
498 	SIMD_FORCE_INLINE int closestAxis() const
499 	{
500 		return absolute().maxAxis();
501 	}
502 
503 
504 	SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
505 	{
506 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
507 		__m128	vrt = _mm_load_ss(&rt);	//	(rt 0 0 0)
508 		btScalar s = btScalar(1.0) - rt;
509 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
510 		vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
511 		__m128 r0 = _mm_mul_ps(v0.mVec128, vs);
512 		vrt = bt_pshufd_ps(vrt, 0x80);	//	(rt rt rt 0.0)
513 		__m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
514 		__m128 tmp3 = _mm_add_ps(r0,r1);
515 		mVec128 = tmp3;
516 #elif defined(BT_USE_NEON)
517 		float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
518 		vl = vmulq_n_f32(vl, rt);
519 		mVec128 = vaddq_f32(vl, v0.mVec128);
520 #else
521 		btScalar s = btScalar(1.0) - rt;
522 		m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
523 		m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
524 		m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
525 		//don't do the unused w component
526 		//		m_co[3] = s * v0[3] + rt * v1[3];
527 #endif
528 	}
529 
530   /**@brief Return the linear interpolation between this and another vector
531    * @param v The other vector
532    * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
533 	SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const
534 	{
535 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
536 		__m128	vt = _mm_load_ss(&t);	//	(t 0 0 0)
537 		vt = bt_pshufd_ps(vt, 0x80);	//	(rt rt rt 0.0)
538 		__m128 vl = _mm_sub_ps(v.mVec128, mVec128);
539 		vl = _mm_mul_ps(vl, vt);
540 		vl = _mm_add_ps(vl, mVec128);
541 
542 		return btVector3(vl);
543 #elif defined(BT_USE_NEON)
544 		float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
545 		vl = vmulq_n_f32(vl, t);
546 		vl = vaddq_f32(vl, mVec128);
547 
548 		return btVector3(vl);
549 #else
550 		return
551 			btVector3(	m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
552 						m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
553 						m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
554 #endif
555 	}
556 
557   /**@brief Elementwise multiply this vector by the other
558    * @param v The other vector */
559 	SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
560 	{
561 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
562 		mVec128 = _mm_mul_ps(mVec128, v.mVec128);
563 #elif defined(BT_USE_NEON)
564 		mVec128 = vmulq_f32(mVec128, v.mVec128);
565 #else
566 		m_floats[0] *= v.m_floats[0];
567 		m_floats[1] *= v.m_floats[1];
568 		m_floats[2] *= v.m_floats[2];
569 #endif
570 		return *this;
571 	}
572 
573 	 /**@brief Return the x value */
574 		SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
575   /**@brief Return the y value */
576 		SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
577   /**@brief Return the z value */
578 		SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
579   /**@brief Set the x value */
580 		SIMD_FORCE_INLINE void	setX(btScalar _x) { m_floats[0] = _x;};
581   /**@brief Set the y value */
582 		SIMD_FORCE_INLINE void	setY(btScalar _y) { m_floats[1] = _y;};
583   /**@brief Set the z value */
584 		SIMD_FORCE_INLINE void	setZ(btScalar _z) { m_floats[2] = _z;};
585   /**@brief Set the w value */
586 		SIMD_FORCE_INLINE void	setW(btScalar _w) { m_floats[3] = _w;};
587   /**@brief Return the x value */
588 		SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
589   /**@brief Return the y value */
590 		SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
591   /**@brief Return the z value */
592 		SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
593   /**@brief Return the w value */
594 		SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
595 
596 	//SIMD_FORCE_INLINE btScalar&       operator[](int i)       { return (&m_floats[0])[i];	}
597 	//SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
598 	///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
599 	SIMD_FORCE_INLINE	operator       btScalar *()       { return &m_floats[0]; }
600 	SIMD_FORCE_INLINE	operator const btScalar *() const { return &m_floats[0]; }
601 
602 	SIMD_FORCE_INLINE	bool	operator==(const btVector3& other) const
603 	{
604 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
605         return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
606 #else
607 		return ((m_floats[3]==other.m_floats[3]) &&
608                 (m_floats[2]==other.m_floats[2]) &&
609                 (m_floats[1]==other.m_floats[1]) &&
610                 (m_floats[0]==other.m_floats[0]));
611 #endif
612 	}
613 
614 	SIMD_FORCE_INLINE	bool	operator!=(const btVector3& other) const
615 	{
616 		return !(*this == other);
617 	}
618 
619   /**@brief Set each element to the max of the current values and the values of another btVector3
620    * @param other The other btVector3 to compare with
621    */
622 	SIMD_FORCE_INLINE void	setMax(const btVector3& other)
623 	{
624 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
625 		mVec128 = _mm_max_ps(mVec128, other.mVec128);
626 #elif defined(BT_USE_NEON)
627 		mVec128 = vmaxq_f32(mVec128, other.mVec128);
628 #else
629 		btSetMax(m_floats[0], other.m_floats[0]);
630 		btSetMax(m_floats[1], other.m_floats[1]);
631 		btSetMax(m_floats[2], other.m_floats[2]);
632 		btSetMax(m_floats[3], other.w());
633 #endif
634 	}
635 
636   /**@brief Set each element to the min of the current values and the values of another btVector3
637    * @param other The other btVector3 to compare with
638    */
639 	SIMD_FORCE_INLINE void	setMin(const btVector3& other)
640 	{
641 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
642 		mVec128 = _mm_min_ps(mVec128, other.mVec128);
643 #elif defined(BT_USE_NEON)
644 		mVec128 = vminq_f32(mVec128, other.mVec128);
645 #else
646 		btSetMin(m_floats[0], other.m_floats[0]);
647 		btSetMin(m_floats[1], other.m_floats[1]);
648 		btSetMin(m_floats[2], other.m_floats[2]);
649 		btSetMin(m_floats[3], other.w());
650 #endif
651 	}
652 
653 	SIMD_FORCE_INLINE void 	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
654 	{
655 		m_floats[0]=_x;
656 		m_floats[1]=_y;
657 		m_floats[2]=_z;
658 		m_floats[3] = btScalar(0.f);
659 	}
660 
661 	void	getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
662 	{
663 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
664 
665 		__m128 V  = _mm_and_ps(mVec128, btvFFF0fMask);
666 		__m128 V0 = _mm_xor_ps(btvMzeroMask, V);
667 		__m128 V2 = _mm_movelh_ps(V0, V);
668 
669 		__m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
670 
671         V0 = _mm_shuffle_ps(V0, V, 0xDB);
672 		V2 = _mm_shuffle_ps(V2, V, 0xF9);
673 
674 		v0->mVec128 = V0;
675 		v1->mVec128 = V1;
676 		v2->mVec128 = V2;
677 #else
678 		v0->setValue(0.		,-z()		,y());
679 		v1->setValue(z()	,0.			,-x());
680 		v2->setValue(-y()	,x()	,0.);
681 #endif
682 	}
683 
684 	void setZero()
685 	{
686 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
687 		mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
688 #elif defined(BT_USE_NEON)
689 		int32x4_t vi = vdupq_n_s32(0);
690 		mVec128 = vreinterpretq_f32_s32(vi);
691 #else
692 		setValue(btScalar(0.),btScalar(0.),btScalar(0.));
693 #endif
694 	}
695 
696 	SIMD_FORCE_INLINE bool isZero() const
697 	{
698 		return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
699 	}
700 
701 
702 	SIMD_FORCE_INLINE bool fuzzyZero() const
703 	{
704 		return length2() < SIMD_EPSILON*SIMD_EPSILON;
705 	}
706 
707 	SIMD_FORCE_INLINE	void	serialize(struct	btVector3Data& dataOut) const;
708 
709 	SIMD_FORCE_INLINE	void	deSerialize(const struct	btVector3Data& dataIn);
710 
711 	SIMD_FORCE_INLINE	void	serializeFloat(struct	btVector3FloatData& dataOut) const;
712 
713 	SIMD_FORCE_INLINE	void	deSerializeFloat(const struct	btVector3FloatData& dataIn);
714 
715 	SIMD_FORCE_INLINE	void	serializeDouble(struct	btVector3DoubleData& dataOut) const;
716 
717 	SIMD_FORCE_INLINE	void	deSerializeDouble(const struct	btVector3DoubleData& dataIn);
718 
719         /**@brief returns index of maximum dot product between this and vectors in array[]
720          * @param array The other vectors
721          * @param array_count The number of other vectors
722          * @param dotOut The maximum dot product */
723         SIMD_FORCE_INLINE   long    maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
724 
725         /**@brief returns index of minimum dot product between this and vectors in array[]
726          * @param array The other vectors
727          * @param array_count The number of other vectors
728          * @param dotOut The minimum dot product */
729         SIMD_FORCE_INLINE   long    minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
730 
731     /* create a vector as  btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 ))  */
732     SIMD_FORCE_INLINE btVector3  dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
733     {
734 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
735 
736         __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
737         __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
738         __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
739         __m128 b0 = _mm_unpacklo_ps( a0, a1 );
740         __m128 b1 = _mm_unpackhi_ps( a0, a1 );
741         __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
742         __m128 r = _mm_movelh_ps( b0, b2 );
743         r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
744         a2 = _mm_and_ps( a2, btvxyzMaskf);
745         r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
746         return btVector3(r);
747 
748 #elif defined(BT_USE_NEON)
749         static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
750         float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
751         float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
752         float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
753         float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
754         a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
755         float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
756         float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
757         return btVector3( vcombine_f32(b0, b1) );
758 #else
759 		return btVector3( dot(v0), dot(v1), dot(v2));
760 #endif
761     }
762 };
763 
764 /**@brief Return the sum of two vectors (Point symantics)*/
765 SIMD_FORCE_INLINE btVector3
766 operator+(const btVector3& v1, const btVector3& v2)
767 {
768 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
769 	return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
770 #elif defined(BT_USE_NEON)
771 	return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
772 #else
773 	return btVector3(
774 			v1.m_floats[0] + v2.m_floats[0],
775 			v1.m_floats[1] + v2.m_floats[1],
776 			v1.m_floats[2] + v2.m_floats[2]);
777 #endif
778 }
779 
780 /**@brief Return the elementwise product of two vectors */
781 SIMD_FORCE_INLINE btVector3
782 operator*(const btVector3& v1, const btVector3& v2)
783 {
784 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
785 	return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
786 #elif defined(BT_USE_NEON)
787 	return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
788 #else
789 	return btVector3(
790 			v1.m_floats[0] * v2.m_floats[0],
791 			v1.m_floats[1] * v2.m_floats[1],
792 			v1.m_floats[2] * v2.m_floats[2]);
793 #endif
794 }
795 
796 /**@brief Return the difference between two vectors */
797 SIMD_FORCE_INLINE btVector3
798 operator-(const btVector3& v1, const btVector3& v2)
799 {
800 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))
801 
802 	//	without _mm_and_ps this code causes slowdown in Concave moving
803 	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
804 	return btVector3(_mm_and_ps(r, btvFFF0fMask));
805 #elif defined(BT_USE_NEON)
806 	float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
807 	return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
808 #else
809 	return btVector3(
810 			v1.m_floats[0] - v2.m_floats[0],
811 			v1.m_floats[1] - v2.m_floats[1],
812 			v1.m_floats[2] - v2.m_floats[2]);
813 #endif
814 }
815 
816 /**@brief Return the negative of the vector */
817 SIMD_FORCE_INLINE btVector3
818 operator-(const btVector3& v)
819 {
820 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
821 	__m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
822 	return btVector3(_mm_and_ps(r, btvFFF0fMask));
823 #elif defined(BT_USE_NEON)
824 	return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
825 #else
826 	return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
827 #endif
828 }
829 
830 /**@brief Return the vector scaled by s */
831 SIMD_FORCE_INLINE btVector3
832 operator*(const btVector3& v, const btScalar& s)
833 {
834 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
835 	__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
836 	vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
837 	return btVector3(_mm_mul_ps(v.mVec128, vs));
838 #elif defined(BT_USE_NEON)
839 	float32x4_t r = vmulq_n_f32(v.mVec128, s);
840 	return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
841 #else
842 	return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
843 #endif
844 }
845 
846 /**@brief Return the vector scaled by s */
847 SIMD_FORCE_INLINE btVector3
848 operator*(const btScalar& s, const btVector3& v)
849 {
850 	return v * s;
851 }
852 
853 /**@brief Return the vector inversely scaled by s */
854 SIMD_FORCE_INLINE btVector3
855 operator/(const btVector3& v, const btScalar& s)
856 {
857 	btFullAssert(s != btScalar(0.0));
858 #if 0 //defined(BT_USE_SSE_IN_API)
859 // this code is not faster !
860 	__m128 vs = _mm_load_ss(&s);
861     vs = _mm_div_ss(v1110, vs);
862 	vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
863 
864 	return btVector3(_mm_mul_ps(v.mVec128, vs));
865 #else
866 	return v * (btScalar(1.0) / s);
867 #endif
868 }
869 
870 /**@brief Return the vector inversely scaled by s */
871 SIMD_FORCE_INLINE btVector3
872 operator/(const btVector3& v1, const btVector3& v2)
873 {
874 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
875 	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
876 	vec = _mm_and_ps(vec, btvFFF0fMask);
877 	return btVector3(vec);
878 #elif defined(BT_USE_NEON)
879 	float32x4_t x, y, v, m;
880 
881 	x = v1.mVec128;
882 	y = v2.mVec128;
883 
884 	v = vrecpeq_f32(y);			// v ~ 1/y
885 	m = vrecpsq_f32(y, v);		// m = (2-v*y)
886 	v = vmulq_f32(v, m);		// vv = v*m ~~ 1/y
887 	m = vrecpsq_f32(y, v);		// mm = (2-vv*y)
888 	v = vmulq_f32(v, x);		// x*vv
889 	v = vmulq_f32(v, m);		// (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
890 
891 	return btVector3(v);
892 #else
893 	return btVector3(
894 			v1.m_floats[0] / v2.m_floats[0],
895 			v1.m_floats[1] / v2.m_floats[1],
896 			v1.m_floats[2] / v2.m_floats[2]);
897 #endif
898 }
899 
900 /**@brief Return the dot product between two vectors */
901 SIMD_FORCE_INLINE btScalar
btDot(const btVector3 & v1,const btVector3 & v2)902 btDot(const btVector3& v1, const btVector3& v2)
903 {
904 	return v1.dot(v2);
905 }
906 
907 
908 /**@brief Return the distance squared between two vectors */
909 SIMD_FORCE_INLINE btScalar
btDistance2(const btVector3 & v1,const btVector3 & v2)910 btDistance2(const btVector3& v1, const btVector3& v2)
911 {
912 	return v1.distance2(v2);
913 }
914 
915 
916 /**@brief Return the distance between two vectors */
917 SIMD_FORCE_INLINE btScalar
btDistance(const btVector3 & v1,const btVector3 & v2)918 btDistance(const btVector3& v1, const btVector3& v2)
919 {
920 	return v1.distance(v2);
921 }
922 
923 /**@brief Return the angle between two vectors */
924 SIMD_FORCE_INLINE btScalar
btAngle(const btVector3 & v1,const btVector3 & v2)925 btAngle(const btVector3& v1, const btVector3& v2)
926 {
927 	return v1.angle(v2);
928 }
929 
930 /**@brief Return the cross product of two vectors */
931 SIMD_FORCE_INLINE btVector3
btCross(const btVector3 & v1,const btVector3 & v2)932 btCross(const btVector3& v1, const btVector3& v2)
933 {
934 	return v1.cross(v2);
935 }
936 
937 SIMD_FORCE_INLINE btScalar
btTriple(const btVector3 & v1,const btVector3 & v2,const btVector3 & v3)938 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
939 {
940 	return v1.triple(v2, v3);
941 }
942 
943 /**@brief Return the linear interpolation between two vectors
944  * @param v1 One vector
945  * @param v2 The other vector
946  * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
947 SIMD_FORCE_INLINE btVector3
lerp(const btVector3 & v1,const btVector3 & v2,const btScalar & t)948 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
949 {
950 	return v1.lerp(v2, t);
951 }
952 
953 
954 
distance2(const btVector3 & v)955 SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const
956 {
957 	return (v - *this).length2();
958 }
959 
distance(const btVector3 & v)960 SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
961 {
962 	return (v - *this).length();
963 }
964 
normalized()965 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
966 {
967 	btVector3 nrm = *this;
968 
969 	return nrm.normalize();
970 }
971 
rotate(const btVector3 & wAxis,const btScalar _angle)972 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
973 {
974 	// wAxis must be a unit lenght vector
975 
976 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
977 
978     __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
979 	btScalar ssin = btSin( _angle );
980     __m128 C = wAxis.cross( mVec128 ).mVec128;
981 	O = _mm_and_ps(O, btvFFF0fMask);
982     btScalar scos = btCos( _angle );
983 
984 	__m128 vsin = _mm_load_ss(&ssin);	//	(S 0 0 0)
985     __m128 vcos = _mm_load_ss(&scos);	//	(S 0 0 0)
986 
987 	__m128 Y = bt_pshufd_ps(O, 0xC9);	//	(Y Z X 0)
988 	__m128 Z = bt_pshufd_ps(O, 0xD2);	//	(Z X Y 0)
989 	O = _mm_add_ps(O, Y);
990 	vsin = bt_pshufd_ps(vsin, 0x80);	//	(S S S 0)
991 	O = _mm_add_ps(O, Z);
992     vcos = bt_pshufd_ps(vcos, 0x80);	//	(S S S 0)
993 
994     vsin = vsin * C;
995 	O = O * wAxis.mVec128;
996 	__m128 X = mVec128 - O;
997 
998     O = O + vsin;
999 	vcos = vcos * X;
1000 	O = O + vcos;
1001 
1002 	return btVector3(O);
1003 #else
1004 	btVector3 o = wAxis * wAxis.dot( *this );
1005 	btVector3 _x = *this - o;
1006 	btVector3 _y;
1007 
1008 	_y = wAxis.cross( *this );
1009 
1010 	return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
1011 #endif
1012 }
1013 
maxDot(const btVector3 * array,long array_count,btScalar & dotOut)1014 SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1015 {
1016 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1017     #ifdef BT_USE_SSE	// Urho3D - to be consistent with the function return below
1018         const long scalar_cutoff = 10;
1019         long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1020     #elif defined BT_USE_NEON
1021         const long scalar_cutoff = 4;
1022         extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1023     #endif
1024     if( array_count < scalar_cutoff )
1025 #endif
1026     {
1027         btScalar maxDot1 = -SIMD_INFINITY;
1028         int i = 0;
1029         int ptIndex = -1;
1030         for( i = 0; i < array_count; i++ )
1031         {
1032             btScalar dot = array[i].dot(*this);
1033 
1034             if( dot > maxDot1 )
1035             {
1036                 maxDot1 = dot;
1037                 ptIndex = i;
1038             }
1039         }
1040 
1041         dotOut = maxDot1;
1042         return ptIndex;
1043     }
1044 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1045     return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1046 #endif
1047 }
1048 
minDot(const btVector3 * array,long array_count,btScalar & dotOut)1049 SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1050 {
1051 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1052     #if defined BT_USE_SSE
1053         const long scalar_cutoff = 10;
1054         long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1055     #elif defined BT_USE_NEON
1056         const long scalar_cutoff = 4;
1057         extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1058     #else
1059         #error unhandled arch!
1060     #endif
1061 
1062     if( array_count < scalar_cutoff )
1063 #endif
1064     {
1065         btScalar  minDot = SIMD_INFINITY;
1066         int i = 0;
1067         int ptIndex = -1;
1068 
1069         for( i = 0; i < array_count; i++ )
1070         {
1071             btScalar dot = array[i].dot(*this);
1072 
1073             if( dot < minDot )
1074             {
1075                 minDot = dot;
1076                 ptIndex = i;
1077             }
1078         }
1079 
1080         dotOut = minDot;
1081 
1082         return ptIndex;
1083     }
1084 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1085     return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1086 #endif//BT_USE_SIMD_VECTOR3
1087 }
1088 
1089 
1090 class btVector4 : public btVector3
1091 {
1092 public:
1093 
btVector4()1094 	SIMD_FORCE_INLINE btVector4() {}
1095 
1096 
btVector4(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1097 	SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1098 		: btVector3(_x,_y,_z)
1099 	{
1100 		m_floats[3] = _w;
1101 	}
1102 
1103 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON)
btVector4(const btSimdFloat4 vec)1104 	SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
1105 	{
1106 		mVec128 = vec;
1107 	}
1108 
btVector4(const btVector3 & rhs)1109 	SIMD_FORCE_INLINE btVector4(const btVector3& rhs)
1110 	{
1111 		mVec128 = rhs.mVec128;
1112 	}
1113 
1114 	SIMD_FORCE_INLINE btVector4&
1115 	operator=(const btVector4& v)
1116 	{
1117 		mVec128 = v.mVec128;
1118 		return *this;
1119 	}
1120 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1121 
absolute4()1122 	SIMD_FORCE_INLINE btVector4 absolute4() const
1123 	{
1124 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
1125 		return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
1126 #elif defined(BT_USE_NEON)
1127 		return btVector4(vabsq_f32(mVec128));
1128 #else
1129 		return btVector4(
1130 			btFabs(m_floats[0]),
1131 			btFabs(m_floats[1]),
1132 			btFabs(m_floats[2]),
1133 			btFabs(m_floats[3]));
1134 #endif
1135 	}
1136 
1137 
getW()1138 	btScalar	getW() const { return m_floats[3];}
1139 
1140 
maxAxis4()1141 		SIMD_FORCE_INLINE int maxAxis4() const
1142 	{
1143 		int maxIndex = -1;
1144 		btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
1145 		if (m_floats[0] > maxVal)
1146 		{
1147 			maxIndex = 0;
1148 			maxVal = m_floats[0];
1149 		}
1150 		if (m_floats[1] > maxVal)
1151 		{
1152 			maxIndex = 1;
1153 			maxVal = m_floats[1];
1154 		}
1155 		if (m_floats[2] > maxVal)
1156 		{
1157 			maxIndex = 2;
1158 			maxVal =m_floats[2];
1159 		}
1160 		if (m_floats[3] > maxVal)
1161 		{
1162 			maxIndex = 3;
1163 			maxVal = m_floats[3];
1164 		}
1165 
1166 		return maxIndex;
1167 	}
1168 
1169 
minAxis4()1170 	SIMD_FORCE_INLINE int minAxis4() const
1171 	{
1172 		int minIndex = -1;
1173 		btScalar minVal = btScalar(BT_LARGE_FLOAT);
1174 		if (m_floats[0] < minVal)
1175 		{
1176 			minIndex = 0;
1177 			minVal = m_floats[0];
1178 		}
1179 		if (m_floats[1] < minVal)
1180 		{
1181 			minIndex = 1;
1182 			minVal = m_floats[1];
1183 		}
1184 		if (m_floats[2] < minVal)
1185 		{
1186 			minIndex = 2;
1187 			minVal =m_floats[2];
1188 		}
1189 		if (m_floats[3] < minVal)
1190 		{
1191 			minIndex = 3;
1192 			minVal = m_floats[3];
1193 		}
1194 
1195 		return minIndex;
1196 	}
1197 
1198 
closestAxis4()1199 	SIMD_FORCE_INLINE int closestAxis4() const
1200 	{
1201 		return absolute4().maxAxis4();
1202 	}
1203 
1204 
1205 
1206 
1207   /**@brief Set x,y,z and zero w
1208    * @param x Value of x
1209    * @param y Value of y
1210    * @param z Value of z
1211    */
1212 
1213 
1214 /*		void getValue(btScalar *m) const
1215 		{
1216 			m[0] = m_floats[0];
1217 			m[1] = m_floats[1];
1218 			m[2] =m_floats[2];
1219 		}
1220 */
1221 /**@brief Set the values
1222    * @param x Value of x
1223    * @param y Value of y
1224    * @param z Value of z
1225    * @param w Value of w
1226    */
setValue(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1227 		SIMD_FORCE_INLINE void	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1228 		{
1229 			m_floats[0]=_x;
1230 			m_floats[1]=_y;
1231 			m_floats[2]=_z;
1232 			m_floats[3]=_w;
1233 		}
1234 
1235 
1236 };
1237 
1238 
1239 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapScalarEndian(const btScalar & sourceVal,btScalar & destVal)1240 SIMD_FORCE_INLINE void	btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
1241 {
1242 	#ifdef BT_USE_DOUBLE_PRECISION
1243 	unsigned char* dest = (unsigned char*) &destVal;
1244 	unsigned char* src  = (unsigned char*) &sourceVal;
1245 	dest[0] = src[7];
1246     dest[1] = src[6];
1247     dest[2] = src[5];
1248     dest[3] = src[4];
1249     dest[4] = src[3];
1250     dest[5] = src[2];
1251     dest[6] = src[1];
1252     dest[7] = src[0];
1253 #else
1254 	unsigned char* dest = (unsigned char*) &destVal;
1255 	unsigned char* src  = (unsigned char*) &sourceVal;
1256 	dest[0] = src[3];
1257     dest[1] = src[2];
1258     dest[2] = src[1];
1259     dest[3] = src[0];
1260 #endif //BT_USE_DOUBLE_PRECISION
1261 }
1262 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapVector3Endian(const btVector3 & sourceVec,btVector3 & destVec)1263 SIMD_FORCE_INLINE void	btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec)
1264 {
1265 	for (int i=0;i<4;i++)
1266 	{
1267 		btSwapScalarEndian(sourceVec[i],destVec[i]);
1268 	}
1269 
1270 }
1271 
1272 ///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btUnSwapVector3Endian(btVector3 & vector)1273 SIMD_FORCE_INLINE void	btUnSwapVector3Endian(btVector3& vector)
1274 {
1275 
1276 	btVector3	swappedVec;
1277 	for (int i=0;i<4;i++)
1278 	{
1279 		btSwapScalarEndian(vector[i],swappedVec[i]);
1280 	}
1281 	vector = swappedVec;
1282 }
1283 
1284 template <class T>
btPlaneSpace1(const T & n,T & p,T & q)1285 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
1286 {
1287   if (btFabs(n[2]) > SIMDSQRT12) {
1288     // choose p in y-z plane
1289     btScalar a = n[1]*n[1] + n[2]*n[2];
1290     btScalar k = btRecipSqrt (a);
1291     p[0] = 0;
1292 	p[1] = -n[2]*k;
1293 	p[2] = n[1]*k;
1294     // set q = n x p
1295     q[0] = a*k;
1296 	q[1] = -n[0]*p[2];
1297 	q[2] = n[0]*p[1];
1298   }
1299   else {
1300     // choose p in x-y plane
1301     btScalar a = n[0]*n[0] + n[1]*n[1];
1302     btScalar k = btRecipSqrt (a);
1303     p[0] = -n[1]*k;
1304 	p[1] = n[0]*k;
1305 	p[2] = 0;
1306     // set q = n x p
1307     q[0] = -n[2]*p[1];
1308 	q[1] = n[2]*p[0];
1309 	q[2] = a*k;
1310   }
1311 }
1312 
1313 
1314 struct	btVector3FloatData
1315 {
1316 	float	m_floats[4];
1317 };
1318 
1319 struct	btVector3DoubleData
1320 {
1321 	double	m_floats[4];
1322 
1323 };
1324 
serializeFloat(struct btVector3FloatData & dataOut)1325 SIMD_FORCE_INLINE	void	btVector3::serializeFloat(struct	btVector3FloatData& dataOut) const
1326 {
1327 	///could also do a memcpy, check if it is worth it
1328 	for (int i=0;i<4;i++)
1329 		dataOut.m_floats[i] = float(m_floats[i]);
1330 }
1331 
deSerializeFloat(const struct btVector3FloatData & dataIn)1332 SIMD_FORCE_INLINE void	btVector3::deSerializeFloat(const struct	btVector3FloatData& dataIn)
1333 {
1334 	for (int i=0;i<4;i++)
1335 		m_floats[i] = btScalar(dataIn.m_floats[i]);
1336 }
1337 
1338 
serializeDouble(struct btVector3DoubleData & dataOut)1339 SIMD_FORCE_INLINE	void	btVector3::serializeDouble(struct	btVector3DoubleData& dataOut) const
1340 {
1341 	///could also do a memcpy, check if it is worth it
1342 	for (int i=0;i<4;i++)
1343 		dataOut.m_floats[i] = double(m_floats[i]);
1344 }
1345 
deSerializeDouble(const struct btVector3DoubleData & dataIn)1346 SIMD_FORCE_INLINE void	btVector3::deSerializeDouble(const struct	btVector3DoubleData& dataIn)
1347 {
1348 	for (int i=0;i<4;i++)
1349 		m_floats[i] = btScalar(dataIn.m_floats[i]);
1350 }
1351 
1352 
serialize(struct btVector3Data & dataOut)1353 SIMD_FORCE_INLINE	void	btVector3::serialize(struct	btVector3Data& dataOut) const
1354 {
1355 	///could also do a memcpy, check if it is worth it
1356 	for (int i=0;i<4;i++)
1357 		dataOut.m_floats[i] = m_floats[i];
1358 }
1359 
deSerialize(const struct btVector3Data & dataIn)1360 SIMD_FORCE_INLINE void	btVector3::deSerialize(const struct	btVector3Data& dataIn)
1361 {
1362 	for (int i=0;i<4;i++)
1363 		m_floats[i] = dataIn.m_floats[i];
1364 }
1365 
1366 #endif //BT_VECTOR3_H
1367