1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 
17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
19 
20 
21 #include "btVector3.h"
22 #include "btQuadWord.h"
23 
24 
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btQuaternionData btQuaternionDoubleData
27 #define btQuaternionDataName "btQuaternionDoubleData"
28 #else
29 #define btQuaternionData btQuaternionFloatData
30 #define btQuaternionDataName "btQuaternionFloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
32 
33 
34 
35 #ifdef BT_USE_SSE
36 
37 //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
38 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
39 
40 #endif
41 
42 #if defined(BT_USE_SSE)
43 
44 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
45 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
46 
47 #elif defined(BT_USE_NEON)
48 
49 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
50 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
51 
52 #endif
53 
54 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
55 class btQuaternion : public btQuadWord {
56 public:
57   /**@brief No initialization constructor */
btQuaternion()58 	btQuaternion() {}
59 
60 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
61 	// Set Vector
btQuaternion(const btSimdFloat4 vec)62 	SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
63 	{
64 		mVec128 = vec;
65 	}
66 
67 	// Copy constructor
btQuaternion(const btQuaternion & rhs)68 	SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
69 	{
70 		mVec128 = rhs.mVec128;
71 	}
72 
73 	// Assignment Operator
74 	SIMD_FORCE_INLINE btQuaternion&
75 	operator=(const btQuaternion& v)
76 	{
77 		mVec128 = v.mVec128;
78 
79 		return *this;
80 	}
81 
82 #endif
83 
84 	//		template <typename btScalar>
85 	//		explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
86   /**@brief Constructor from scalars */
btQuaternion(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)87 	btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
88 		: btQuadWord(_x, _y, _z, _w)
89 	{}
90   /**@brief Axis angle Constructor
91    * @param axis The axis which the rotation is around
92    * @param angle The magnitude of the rotation around the angle (Radians) */
btQuaternion(const btVector3 & _axis,const btScalar & _angle)93 	btQuaternion(const btVector3& _axis, const btScalar& _angle)
94 	{
95 		setRotation(_axis, _angle);
96 	}
97   /**@brief Constructor from Euler angles
98    * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
99    * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
100    * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
btQuaternion(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)101 	btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
102 	{
103 #ifndef BT_EULER_DEFAULT_ZYX
104 		setEuler(yaw, pitch, roll);
105 #else
106 		setEulerZYX(yaw, pitch, roll);
107 #endif
108 	}
109   /**@brief Set the rotation using axis angle notation
110    * @param axis The axis around which to rotate
111    * @param angle The magnitude of the rotation in Radians */
setRotation(const btVector3 & axis,const btScalar & _angle)112 	void setRotation(const btVector3& axis, const btScalar& _angle)
113 	{
114 		btScalar d = axis.length();
115 		btAssert(d != btScalar(0.0));
116 		btScalar s = btSin(_angle * btScalar(0.5)) / d;
117 		setValue(axis.x() * s, axis.y() * s, axis.z() * s,
118 			btCos(_angle * btScalar(0.5)));
119 	}
120   /**@brief Set the quaternion using Euler angles
121    * @param yaw Angle around Y
122    * @param pitch Angle around X
123    * @param roll Angle around Z */
setEuler(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)124 	void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
125 	{
126 		btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
127 		btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
128 		btScalar halfRoll = btScalar(roll) * btScalar(0.5);
129 		btScalar cosYaw = btCos(halfYaw);
130 		btScalar sinYaw = btSin(halfYaw);
131 		btScalar cosPitch = btCos(halfPitch);
132 		btScalar sinPitch = btSin(halfPitch);
133 		btScalar cosRoll = btCos(halfRoll);
134 		btScalar sinRoll = btSin(halfRoll);
135 		setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
136 			cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
137 			sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
138 			cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
139 	}
140   /**@brief Set the quaternion using euler angles
141    * @param yaw Angle around Z
142    * @param pitch Angle around Y
143    * @param roll Angle around X */
setEulerZYX(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)144 	void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
145 	{
146 		btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
147 		btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
148 		btScalar halfRoll = btScalar(roll) * btScalar(0.5);
149 		btScalar cosYaw = btCos(halfYaw);
150 		btScalar sinYaw = btSin(halfYaw);
151 		btScalar cosPitch = btCos(halfPitch);
152 		btScalar sinPitch = btSin(halfPitch);
153 		btScalar cosRoll = btCos(halfRoll);
154 		btScalar sinRoll = btSin(halfRoll);
155 		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
156                          cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
157                          cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
158                          cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
159 	}
160   /**@brief Add two quaternions
161    * @param q The quaternion to add to this one */
162 	SIMD_FORCE_INLINE	btQuaternion& operator+=(const btQuaternion& q)
163 	{
164 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
165 		mVec128 = _mm_add_ps(mVec128, q.mVec128);
166 #elif defined(BT_USE_NEON)
167 		mVec128 = vaddq_f32(mVec128, q.mVec128);
168 #else
169 		m_floats[0] += q.x();
170         m_floats[1] += q.y();
171         m_floats[2] += q.z();
172         m_floats[3] += q.m_floats[3];
173 #endif
174 		return *this;
175 	}
176 
177   /**@brief Subtract out a quaternion
178    * @param q The quaternion to subtract from this one */
179 	btQuaternion& operator-=(const btQuaternion& q)
180 	{
181 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
182 		mVec128 = _mm_sub_ps(mVec128, q.mVec128);
183 #elif defined(BT_USE_NEON)
184 		mVec128 = vsubq_f32(mVec128, q.mVec128);
185 #else
186 		m_floats[0] -= q.x();
187         m_floats[1] -= q.y();
188         m_floats[2] -= q.z();
189         m_floats[3] -= q.m_floats[3];
190 #endif
191         return *this;
192 	}
193 
194   /**@brief Scale this quaternion
195    * @param s The scalar to scale by */
196 	btQuaternion& operator*=(const btScalar& s)
197 	{
198 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
199 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
200 		vs = bt_pshufd_ps(vs, 0);	//	(S S S S)
201 		mVec128 = _mm_mul_ps(mVec128, vs);
202 #elif defined(BT_USE_NEON)
203 		mVec128 = vmulq_n_f32(mVec128, s);
204 #else
205 		m_floats[0] *= s;
206         m_floats[1] *= s;
207         m_floats[2] *= s;
208         m_floats[3] *= s;
209 #endif
210 		return *this;
211 	}
212 
213   /**@brief Multiply this quaternion by q on the right
214    * @param q The other quaternion
215    * Equivilant to this = this * q */
216 	btQuaternion& operator*=(const btQuaternion& q)
217 	{
218 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
219 		__m128 vQ2 = q.get128();
220 
221 		__m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
222 		__m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
223 
224 		A1 = A1 * B1;
225 
226 		__m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
227 		__m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
228 
229 		A2 = A2 * B2;
230 
231 		B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
232 		B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
233 
234 		B1 = B1 * B2;	//	A3 *= B3
235 
236 		mVec128 = bt_splat_ps(mVec128, 3);	//	A0
237 		mVec128 = mVec128 * vQ2;	//	A0 * B0
238 
239 		A1 = A1 + A2;	//	AB12
240 		mVec128 = mVec128 - B1;	//	AB03 = AB0 - AB3
241 		A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
242 		mVec128 = mVec128+ A1;	//	AB03 + AB12
243 
244 #elif defined(BT_USE_NEON)
245 
246         float32x4_t vQ1 = mVec128;
247         float32x4_t vQ2 = q.get128();
248         float32x4_t A0, A1, B1, A2, B2, A3, B3;
249         float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
250 
251         {
252         float32x2x2_t tmp;
253         tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
254         vQ1zx = tmp.val[0];
255 
256         tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
257         vQ2zx = tmp.val[0];
258         }
259         vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
260 
261         vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
262 
263         vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
264         vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
265 
266         A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
267         B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
268 
269         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
270         B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
271 
272         A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
273         B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
274 
275         A1 = vmulq_f32(A1, B1);
276         A2 = vmulq_f32(A2, B2);
277         A3 = vmulq_f32(A3, B3);	//	A3 *= B3
278         A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
279 
280         A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
281         A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3
282 
283         //	change the sign of the last element
284         A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
285         A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
286 
287         mVec128 = A0;
288 #else
289 		setValue(
290             m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
291 			m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
292 			m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
293 			m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
294 #endif
295 		return *this;
296 	}
297   /**@brief Return the dot product between this quaternion and another
298    * @param q The other quaternion */
dot(const btQuaternion & q)299 	btScalar dot(const btQuaternion& q) const
300 	{
301 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
302 		__m128	vd;
303 
304 		vd = _mm_mul_ps(mVec128, q.mVec128);
305 
306         __m128 t = _mm_movehl_ps(vd, vd);
307 		vd = _mm_add_ps(vd, t);
308 		t = _mm_shuffle_ps(vd, vd, 0x55);
309 		vd = _mm_add_ss(vd, t);
310 
311         return _mm_cvtss_f32(vd);
312 #elif defined(BT_USE_NEON)
313 		float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
314 		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
315 		x = vpadd_f32(x, x);
316 		return vget_lane_f32(x, 0);
317 #else
318 		return  m_floats[0] * q.x() +
319                 m_floats[1] * q.y() +
320                 m_floats[2] * q.z() +
321                 m_floats[3] * q.m_floats[3];
322 #endif
323 	}
324 
325   /**@brief Return the length squared of the quaternion */
length2()326 	btScalar length2() const
327 	{
328 		return dot(*this);
329 	}
330 
331   /**@brief Return the length of the quaternion */
length()332 	btScalar length() const
333 	{
334 		return btSqrt(length2());
335 	}
336 
337   /**@brief Normalize the quaternion
338    * Such that x^2 + y^2 + z^2 +w^2 = 1 */
normalize()339 	btQuaternion& normalize()
340 	{
341 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
342 		__m128	vd;
343 
344 		vd = _mm_mul_ps(mVec128, mVec128);
345 
346         __m128 t = _mm_movehl_ps(vd, vd);
347 		vd = _mm_add_ps(vd, t);
348 		t = _mm_shuffle_ps(vd, vd, 0x55);
349 		vd = _mm_add_ss(vd, t);
350 
351 		vd = _mm_sqrt_ss(vd);
352 		vd = _mm_div_ss(vOnes, vd);
353         vd = bt_pshufd_ps(vd, 0); // splat
354 		mVec128 = _mm_mul_ps(mVec128, vd);
355 
356 		return *this;
357 #else
358 		return *this /= length();
359 #endif
360 	}
361 
362   /**@brief Return a scaled version of this quaternion
363    * @param s The scale factor */
364 	SIMD_FORCE_INLINE btQuaternion
365 	operator*(const btScalar& s) const
366 	{
367 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
368 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
369 		vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
370 
371 		return btQuaternion(_mm_mul_ps(mVec128, vs));
372 #elif defined(BT_USE_NEON)
373 		return btQuaternion(vmulq_n_f32(mVec128, s));
374 #else
375 		return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
376 #endif
377 	}
378 
379   /**@brief Return an inversely scaled versionof this quaternion
380    * @param s The inverse scale factor */
381 	btQuaternion operator/(const btScalar& s) const
382 	{
383 		btAssert(s != btScalar(0.0));
384 		return *this * (btScalar(1.0) / s);
385 	}
386 
387   /**@brief Inversely scale this quaternion
388    * @param s The scale factor */
389 	btQuaternion& operator/=(const btScalar& s)
390 	{
391 		btAssert(s != btScalar(0.0));
392 		return *this *= btScalar(1.0) / s;
393 	}
394 
395   /**@brief Return a normalized version of this quaternion */
normalized()396 	btQuaternion normalized() const
397 	{
398 		return *this / length();
399 	}
400 	/**@brief Return the ***half*** angle between this quaternion and the other
401    * @param q The other quaternion */
angle(const btQuaternion & q)402 	btScalar angle(const btQuaternion& q) const
403 	{
404 		btScalar s = btSqrt(length2() * q.length2());
405 		btAssert(s != btScalar(0.0));
406 		return btAcos(dot(q) / s);
407 	}
408 
409 	/**@brief Return the angle between this quaternion and the other along the shortest path
410 	* @param q The other quaternion */
angleShortestPath(const btQuaternion & q)411 	btScalar angleShortestPath(const btQuaternion& q) const
412 	{
413 		btScalar s = btSqrt(length2() * q.length2());
414 		btAssert(s != btScalar(0.0));
415 		if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
416 			return btAcos(dot(-q) / s) * btScalar(2.0);
417 		else
418 			return btAcos(dot(q) / s) * btScalar(2.0);
419 	}
420 
421 	/**@brief Return the angle [0, 2Pi] of rotation represented by this quaternion */
getAngle()422 	btScalar getAngle() const
423 	{
424 		btScalar s = btScalar(2.) * btAcos(m_floats[3]);
425 		return s;
426 	}
427 
428 	/**@brief Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path */
getAngleShortestPath()429 	btScalar getAngleShortestPath() const
430 	{
431 		btScalar s;
432 		if (m_floats[3] >= 0)
433 			s = btScalar(2.) * btAcos(m_floats[3]);
434 		else
435 			s = btScalar(2.) * btAcos(-m_floats[3]);
436 		return s;
437 	}
438 
439 
440 	/**@brief Return the axis of the rotation represented by this quaternion */
getAxis()441 	btVector3 getAxis() const
442 	{
443 		btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
444 
445 		if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
446 			return btVector3(1.0, 0.0, 0.0);  // Arbitrary
447 		btScalar s = 1.f/btSqrt(s_squared);
448 		return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
449 	}
450 
451 	/**@brief Return the inverse of this quaternion */
inverse()452 	btQuaternion inverse() const
453 	{
454 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
455 		return btQuaternion(_mm_xor_ps(mVec128, vQInv));
456 #elif defined(BT_USE_NEON)
457         return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
458 #else
459 		return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
460 #endif
461 	}
462 
463   /**@brief Return the sum of this quaternion and the other
464    * @param q2 The other quaternion */
465 	SIMD_FORCE_INLINE btQuaternion
466 	operator+(const btQuaternion& q2) const
467 	{
468 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
469 		return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
470 #elif defined(BT_USE_NEON)
471         return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
472 #else
473 		const btQuaternion& q1 = *this;
474 		return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
475 #endif
476 	}
477 
478   /**@brief Return the difference between this quaternion and the other
479    * @param q2 The other quaternion */
480 	SIMD_FORCE_INLINE btQuaternion
481 	operator-(const btQuaternion& q2) const
482 	{
483 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
484 		return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
485 #elif defined(BT_USE_NEON)
486         return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
487 #else
488 		const btQuaternion& q1 = *this;
489 		return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
490 #endif
491 	}
492 
493   /**@brief Return the negative of this quaternion
494    * This simply negates each element */
495 	SIMD_FORCE_INLINE btQuaternion operator-() const
496 	{
497 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
498 		return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
499 #elif defined(BT_USE_NEON)
500 		return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
501 #else
502 		const btQuaternion& q2 = *this;
503 		return btQuaternion( - q2.x(), - q2.y(),  - q2.z(),  - q2.m_floats[3]);
504 #endif
505 	}
506   /**@todo document this and it's use */
farthest(const btQuaternion & qd)507 	SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
508 	{
509 		btQuaternion diff,sum;
510 		diff = *this - qd;
511 		sum = *this + qd;
512 		if( diff.dot(diff) > sum.dot(sum) )
513 			return qd;
514 		return (-qd);
515 	}
516 
517 	/**@todo document this and it's use */
nearest(const btQuaternion & qd)518 	SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
519 	{
520 		btQuaternion diff,sum;
521 		diff = *this - qd;
522 		sum = *this + qd;
523 		if( diff.dot(diff) < sum.dot(sum) )
524 			return qd;
525 		return (-qd);
526 	}
527 
528 
529   /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
530    * @param q The other quaternion to interpolate with
531    * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
532    * Slerp interpolates assuming constant velocity.  */
slerp(const btQuaternion & q,const btScalar & t)533 	btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
534 	{
535 
536 		const btScalar magnitude = btSqrt(length2() * q.length2());
537 		btAssert(magnitude > btScalar(0));
538 
539 		const btScalar product = dot(q) / magnitude;
540 		const btScalar absproduct = btFabs(product);
541 
542 		if(absproduct < btScalar(1.0 - SIMD_EPSILON))
543 		{
544 			// Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
545 			const btScalar theta = btAcos(absproduct);
546 			const btScalar d = btSin(theta);
547 			btAssert(d > btScalar(0));
548 
549 			const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
550 			const btScalar s0 = btSin((btScalar(1.0) - t) * theta) / d;
551 			const btScalar s1 = btSin(sign * t * theta) / d;
552 
553 			return btQuaternion(
554 				(m_floats[0] * s0 + q.x() * s1),
555 				(m_floats[1] * s0 + q.y() * s1),
556 				(m_floats[2] * s0 + q.z() * s1),
557 				(m_floats[3] * s0 + q.w() * s1));
558 		}
559 		else
560 		{
561 			return *this;
562 		}
563 	}
564 
getIdentity()565 	static const btQuaternion&	getIdentity()
566 	{
567 		static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
568 		return identityQuat;
569 	}
570 
getW()571 	SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
572 
573 	SIMD_FORCE_INLINE	void	serialize(struct	btQuaternionData& dataOut) const;
574 
575 	SIMD_FORCE_INLINE	void	deSerialize(const struct	btQuaternionData& dataIn);
576 
577 	SIMD_FORCE_INLINE	void	serializeFloat(struct	btQuaternionFloatData& dataOut) const;
578 
579 	SIMD_FORCE_INLINE	void	deSerializeFloat(const struct	btQuaternionFloatData& dataIn);
580 
581 	SIMD_FORCE_INLINE	void	serializeDouble(struct	btQuaternionDoubleData& dataOut) const;
582 
583 	SIMD_FORCE_INLINE	void	deSerializeDouble(const struct	btQuaternionDoubleData& dataIn);
584 
585 };
586 
587 
588 
589 
590 
591 /**@brief Return the product of two quaternions */
592 SIMD_FORCE_INLINE btQuaternion
593 operator*(const btQuaternion& q1, const btQuaternion& q2)
594 {
595 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
596 	__m128 vQ1 = q1.get128();
597 	__m128 vQ2 = q2.get128();
598 	__m128 A0, A1, B1, A2, B2;
599 
600 	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y  z x     //      vtrn
601 	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W  W X     // vdup vext
602 
603 	A1 = A1 * B1;
604 
605 	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z  X Y     // vext
606 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x  Y Y     // vtrn vdup
607 
608 	A2 = A2 * B2;
609 
610 	B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z      // vtrn vext
611 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z      // vext vtrn
612 
613 	B1 = B1 * B2;	//	A3 *= B3
614 
615 	A0 = bt_splat_ps(vQ1, 3);	//	A0
616 	A0 = A0 * vQ2;	//	A0 * B0
617 
618 	A1 = A1 + A2;	//	AB12
619 	A0 =  A0 - B1;	//	AB03 = AB0 - AB3
620 
621     A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
622 	A0 = A0 + A1;	//	AB03 + AB12
623 
624 	return btQuaternion(A0);
625 
626 #elif defined(BT_USE_NEON)
627 
628 	float32x4_t vQ1 = q1.get128();
629 	float32x4_t vQ2 = q2.get128();
630 	float32x4_t A0, A1, B1, A2, B2, A3, B3;
631     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
632 
633     {
634     float32x2x2_t tmp;
635     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
636     vQ1zx = tmp.val[0];
637 
638     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
639     vQ2zx = tmp.val[0];
640     }
641     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
642 
643     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
644 
645     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
646     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
647 
648     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
649     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
650 
651 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
652     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
653 
654     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
655     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
656 
657 	A1 = vmulq_f32(A1, B1);
658 	A2 = vmulq_f32(A2, B2);
659 	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
660 	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
661 
662 	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
663 	A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3
664 
665     //	change the sign of the last element
666     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
667 	A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
668 
669 	return btQuaternion(A0);
670 
671 #else
672 	return btQuaternion(
673         q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
674 		q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
675 		q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
676 		q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
677 #endif
678 }
679 
680 SIMD_FORCE_INLINE btQuaternion
681 operator*(const btQuaternion& q, const btVector3& w)
682 {
683 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
684 	__m128 vQ1 = q.get128();
685 	__m128 vQ2 = w.get128();
686 	__m128 A1, B1, A2, B2, A3, B3;
687 
688 	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
689 	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
690 
691 	A1 = A1 * B1;
692 
693 	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
694 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
695 
696 	A2 = A2 * B2;
697 
698 	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
699 	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
700 
701 	A3 = A3 * B3;	//	A3 *= B3
702 
703 	A1 = A1 + A2;	//	AB12
704 	A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
705     A1 = A1 - A3;	//	AB123 = AB12 - AB3
706 
707 	return btQuaternion(A1);
708 
709 #elif defined(BT_USE_NEON)
710 
711 	float32x4_t vQ1 = q.get128();
712 	float32x4_t vQ2 = w.get128();
713 	float32x4_t A1, B1, A2, B2, A3, B3;
714     float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
715 
716     vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
717     {
718     float32x2x2_t tmp;
719 
720     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
721     vQ2zx = tmp.val[0];
722 
723     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
724     vQ1zx = tmp.val[0];
725     }
726 
727     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
728 
729     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
730     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
731 
732     A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W  W X
733     B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                    // X Y  z x
734 
735 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
736     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
737 
738     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
739     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
740 
741 	A1 = vmulq_f32(A1, B1);
742 	A2 = vmulq_f32(A2, B2);
743 	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
744 
745 	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
746 
747     //	change the sign of the last element
748     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
749 
750     A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
751 
752 	return btQuaternion(A1);
753 
754 #else
755 	return btQuaternion(
756          q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
757 		 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
758 		 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
759 		-q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
760 #endif
761 }
762 
763 SIMD_FORCE_INLINE btQuaternion
764 operator*(const btVector3& w, const btQuaternion& q)
765 {
766 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
767 	__m128 vQ1 = w.get128();
768 	__m128 vQ2 = q.get128();
769 	__m128 A1, B1, A2, B2, A3, B3;
770 
771 	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));  // X Y  z x
772 	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));  // W W  W X
773 
774 	A1 = A1 * B1;
775 
776 	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
777 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
778 
779 	A2 = A2 *B2;
780 
781 	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
782 	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
783 
784 	A3 = A3 * B3;	//	A3 *= B3
785 
786 	A1 = A1 + A2;	//	AB12
787 	A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
788 	A1 = A1 - A3;	//	AB123 = AB12 - AB3
789 
790 	return btQuaternion(A1);
791 
792 #elif defined(BT_USE_NEON)
793 
794 	float32x4_t vQ1 = w.get128();
795 	float32x4_t vQ2 = q.get128();
796 	float32x4_t  A1, B1, A2, B2, A3, B3;
797     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
798 
799     {
800     float32x2x2_t tmp;
801 
802     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
803     vQ1zx = tmp.val[0];
804 
805     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
806     vQ2zx = tmp.val[0];
807     }
808     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
809 
810     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
811 
812     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
813     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
814 
815     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
816     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
817 
818 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
819     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
820 
821     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
822     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
823 
824 	A1 = vmulq_f32(A1, B1);
825 	A2 = vmulq_f32(A2, B2);
826 	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
827 
828 	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
829 
830     //	change the sign of the last element
831     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
832 
833     A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
834 
835 	return btQuaternion(A1);
836 
837 #else
838 	return btQuaternion(
839         +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
840 		+w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
841 		+w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
842 		-w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
843 #endif
844 }
845 
846 /**@brief Calculate the dot product between two quaternions */
847 SIMD_FORCE_INLINE btScalar
dot(const btQuaternion & q1,const btQuaternion & q2)848 dot(const btQuaternion& q1, const btQuaternion& q2)
849 {
850 	return q1.dot(q2);
851 }
852 
853 
854 /**@brief Return the length of a quaternion */
855 SIMD_FORCE_INLINE btScalar
length(const btQuaternion & q)856 length(const btQuaternion& q)
857 {
858 	return q.length();
859 }
860 
861 /**@brief Return the angle between two quaternions*/
862 SIMD_FORCE_INLINE btScalar
btAngle(const btQuaternion & q1,const btQuaternion & q2)863 btAngle(const btQuaternion& q1, const btQuaternion& q2)
864 {
865 	return q1.angle(q2);
866 }
867 
868 /**@brief Return the inverse of a quaternion*/
869 SIMD_FORCE_INLINE btQuaternion
inverse(const btQuaternion & q)870 inverse(const btQuaternion& q)
871 {
872 	return q.inverse();
873 }
874 
875 /**@brief Return the result of spherical linear interpolation betwen two quaternions
876  * @param q1 The first quaternion
877  * @param q2 The second quaternion
878  * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2
879  * Slerp assumes constant velocity between positions. */
880 SIMD_FORCE_INLINE btQuaternion
slerp(const btQuaternion & q1,const btQuaternion & q2,const btScalar & t)881 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
882 {
883 	return q1.slerp(q2, t);
884 }
885 
886 SIMD_FORCE_INLINE btVector3
quatRotate(const btQuaternion & rotation,const btVector3 & v)887 quatRotate(const btQuaternion& rotation, const btVector3& v)
888 {
889 	btQuaternion q = rotation * v;
890 	q *= rotation.inverse();
891 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
892 	return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
893 #elif defined(BT_USE_NEON)
894     return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
895 #else
896 	return btVector3(q.getX(),q.getY(),q.getZ());
897 #endif
898 }
899 
900 SIMD_FORCE_INLINE btQuaternion
shortestArcQuat(const btVector3 & v0,const btVector3 & v1)901 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
902 {
903 	btVector3 c = v0.cross(v1);
904 	btScalar  d = v0.dot(v1);
905 
906 	if (d < -1.0 + SIMD_EPSILON)
907 	{
908 		btVector3 n,unused;
909 		btPlaneSpace1(v0,n,unused);
910 		return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
911 	}
912 
913 	btScalar  s = btSqrt((1.0f + d) * 2.0f);
914 	btScalar rs = 1.0f / s;
915 
916 	return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
917 }
918 
919 SIMD_FORCE_INLINE btQuaternion
shortestArcQuatNormalize2(btVector3 & v0,btVector3 & v1)920 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
921 {
922 	v0.normalize();
923 	v1.normalize();
924 	return shortestArcQuat(v0,v1);
925 }
926 
927 
928 
929 
930 struct	btQuaternionFloatData
931 {
932 	float	m_floats[4];
933 };
934 
935 struct	btQuaternionDoubleData
936 {
937 	double	m_floats[4];
938 
939 };
940 
serializeFloat(struct btQuaternionFloatData & dataOut)941 SIMD_FORCE_INLINE	void	btQuaternion::serializeFloat(struct	btQuaternionFloatData& dataOut) const
942 {
943 	///could also do a memcpy, check if it is worth it
944 	for (int i=0;i<4;i++)
945 		dataOut.m_floats[i] = float(m_floats[i]);
946 }
947 
deSerializeFloat(const struct btQuaternionFloatData & dataIn)948 SIMD_FORCE_INLINE void	btQuaternion::deSerializeFloat(const struct	btQuaternionFloatData& dataIn)
949 {
950 	for (int i=0;i<4;i++)
951 		m_floats[i] = btScalar(dataIn.m_floats[i]);
952 }
953 
954 
serializeDouble(struct btQuaternionDoubleData & dataOut)955 SIMD_FORCE_INLINE	void	btQuaternion::serializeDouble(struct	btQuaternionDoubleData& dataOut) const
956 {
957 	///could also do a memcpy, check if it is worth it
958 	for (int i=0;i<4;i++)
959 		dataOut.m_floats[i] = double(m_floats[i]);
960 }
961 
deSerializeDouble(const struct btQuaternionDoubleData & dataIn)962 SIMD_FORCE_INLINE void	btQuaternion::deSerializeDouble(const struct	btQuaternionDoubleData& dataIn)
963 {
964 	for (int i=0;i<4;i++)
965 		m_floats[i] = btScalar(dataIn.m_floats[i]);
966 }
967 
968 
serialize(struct btQuaternionData & dataOut)969 SIMD_FORCE_INLINE	void	btQuaternion::serialize(struct	btQuaternionData& dataOut) const
970 {
971 	///could also do a memcpy, check if it is worth it
972 	for (int i=0;i<4;i++)
973 		dataOut.m_floats[i] = m_floats[i];
974 }
975 
deSerialize(const struct btQuaternionData & dataIn)976 SIMD_FORCE_INLINE void	btQuaternion::deSerialize(const struct	btQuaternionData& dataIn)
977 {
978 	for (int i=0;i<4;i++)
979 		m_floats[i] = dataIn.m_floats[i];
980 }
981 
982 
983 #endif //BT_SIMD__QUATERNION_H_
984 
985 
986 
987