1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14
15
16
17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
19
20
21 #include "btVector3.h"
22 #include "btQuadWord.h"
23
24
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btQuaternionData btQuaternionDoubleData
27 #define btQuaternionDataName "btQuaternionDoubleData"
28 #else
29 #define btQuaternionData btQuaternionFloatData
30 #define btQuaternionDataName "btQuaternionFloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
32
33
34
35 #ifdef BT_USE_SSE
36
37 //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
38 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
39
40 #endif
41
42 #if defined(BT_USE_SSE)
43
44 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
45 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
46
47 #elif defined(BT_USE_NEON)
48
49 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
50 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
51
52 #endif
53
54 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
55 class btQuaternion : public btQuadWord {
56 public:
57 /**@brief No initialization constructor */
btQuaternion()58 btQuaternion() {}
59
60 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
61 // Set Vector
btQuaternion(const btSimdFloat4 vec)62 SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
63 {
64 mVec128 = vec;
65 }
66
67 // Copy constructor
btQuaternion(const btQuaternion & rhs)68 SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
69 {
70 mVec128 = rhs.mVec128;
71 }
72
73 // Assignment Operator
74 SIMD_FORCE_INLINE btQuaternion&
75 operator=(const btQuaternion& v)
76 {
77 mVec128 = v.mVec128;
78
79 return *this;
80 }
81
82 #endif
83
84 // template <typename btScalar>
85 // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
86 /**@brief Constructor from scalars */
btQuaternion(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)87 btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
88 : btQuadWord(_x, _y, _z, _w)
89 {}
90 /**@brief Axis angle Constructor
91 * @param axis The axis which the rotation is around
92 * @param angle The magnitude of the rotation around the angle (Radians) */
btQuaternion(const btVector3 & _axis,const btScalar & _angle)93 btQuaternion(const btVector3& _axis, const btScalar& _angle)
94 {
95 setRotation(_axis, _angle);
96 }
97 /**@brief Constructor from Euler angles
98 * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
99 * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
100 * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
btQuaternion(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)101 btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
102 {
103 #ifndef BT_EULER_DEFAULT_ZYX
104 setEuler(yaw, pitch, roll);
105 #else
106 setEulerZYX(yaw, pitch, roll);
107 #endif
108 }
109 /**@brief Set the rotation using axis angle notation
110 * @param axis The axis around which to rotate
111 * @param angle The magnitude of the rotation in Radians */
setRotation(const btVector3 & axis,const btScalar & _angle)112 void setRotation(const btVector3& axis, const btScalar& _angle)
113 {
114 btScalar d = axis.length();
115 btAssert(d != btScalar(0.0));
116 btScalar s = btSin(_angle * btScalar(0.5)) / d;
117 setValue(axis.x() * s, axis.y() * s, axis.z() * s,
118 btCos(_angle * btScalar(0.5)));
119 }
120 /**@brief Set the quaternion using Euler angles
121 * @param yaw Angle around Y
122 * @param pitch Angle around X
123 * @param roll Angle around Z */
setEuler(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)124 void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
125 {
126 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
127 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
128 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
129 btScalar cosYaw = btCos(halfYaw);
130 btScalar sinYaw = btSin(halfYaw);
131 btScalar cosPitch = btCos(halfPitch);
132 btScalar sinPitch = btSin(halfPitch);
133 btScalar cosRoll = btCos(halfRoll);
134 btScalar sinRoll = btSin(halfRoll);
135 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
136 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
137 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
138 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
139 }
140 /**@brief Set the quaternion using euler angles
141 * @param yaw Angle around Z
142 * @param pitch Angle around Y
143 * @param roll Angle around X */
setEulerZYX(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)144 void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
145 {
146 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
147 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
148 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
149 btScalar cosYaw = btCos(halfYaw);
150 btScalar sinYaw = btSin(halfYaw);
151 btScalar cosPitch = btCos(halfPitch);
152 btScalar sinPitch = btSin(halfPitch);
153 btScalar cosRoll = btCos(halfRoll);
154 btScalar sinRoll = btSin(halfRoll);
155 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
156 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
157 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
158 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
159 }
160 /**@brief Add two quaternions
161 * @param q The quaternion to add to this one */
162 SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
163 {
164 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
165 mVec128 = _mm_add_ps(mVec128, q.mVec128);
166 #elif defined(BT_USE_NEON)
167 mVec128 = vaddq_f32(mVec128, q.mVec128);
168 #else
169 m_floats[0] += q.x();
170 m_floats[1] += q.y();
171 m_floats[2] += q.z();
172 m_floats[3] += q.m_floats[3];
173 #endif
174 return *this;
175 }
176
177 /**@brief Subtract out a quaternion
178 * @param q The quaternion to subtract from this one */
179 btQuaternion& operator-=(const btQuaternion& q)
180 {
181 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
182 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
183 #elif defined(BT_USE_NEON)
184 mVec128 = vsubq_f32(mVec128, q.mVec128);
185 #else
186 m_floats[0] -= q.x();
187 m_floats[1] -= q.y();
188 m_floats[2] -= q.z();
189 m_floats[3] -= q.m_floats[3];
190 #endif
191 return *this;
192 }
193
194 /**@brief Scale this quaternion
195 * @param s The scalar to scale by */
196 btQuaternion& operator*=(const btScalar& s)
197 {
198 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
199 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
200 vs = bt_pshufd_ps(vs, 0); // (S S S S)
201 mVec128 = _mm_mul_ps(mVec128, vs);
202 #elif defined(BT_USE_NEON)
203 mVec128 = vmulq_n_f32(mVec128, s);
204 #else
205 m_floats[0] *= s;
206 m_floats[1] *= s;
207 m_floats[2] *= s;
208 m_floats[3] *= s;
209 #endif
210 return *this;
211 }
212
213 /**@brief Multiply this quaternion by q on the right
214 * @param q The other quaternion
215 * Equivilant to this = this * q */
216 btQuaternion& operator*=(const btQuaternion& q)
217 {
218 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
219 __m128 vQ2 = q.get128();
220
221 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
222 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
223
224 A1 = A1 * B1;
225
226 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
227 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
228
229 A2 = A2 * B2;
230
231 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
232 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
233
234 B1 = B1 * B2; // A3 *= B3
235
236 mVec128 = bt_splat_ps(mVec128, 3); // A0
237 mVec128 = mVec128 * vQ2; // A0 * B0
238
239 A1 = A1 + A2; // AB12
240 mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
241 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
242 mVec128 = mVec128+ A1; // AB03 + AB12
243
244 #elif defined(BT_USE_NEON)
245
246 float32x4_t vQ1 = mVec128;
247 float32x4_t vQ2 = q.get128();
248 float32x4_t A0, A1, B1, A2, B2, A3, B3;
249 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
250
251 {
252 float32x2x2_t tmp;
253 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
254 vQ1zx = tmp.val[0];
255
256 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
257 vQ2zx = tmp.val[0];
258 }
259 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
260
261 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
262
263 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
264 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
265
266 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
267 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
268
269 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
270 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
271
272 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
273 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
274
275 A1 = vmulq_f32(A1, B1);
276 A2 = vmulq_f32(A2, B2);
277 A3 = vmulq_f32(A3, B3); // A3 *= B3
278 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
279
280 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
281 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
282
283 // change the sign of the last element
284 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
285 A0 = vaddq_f32(A0, A1); // AB03 + AB12
286
287 mVec128 = A0;
288 #else
289 setValue(
290 m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
291 m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
292 m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
293 m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
294 #endif
295 return *this;
296 }
297 /**@brief Return the dot product between this quaternion and another
298 * @param q The other quaternion */
dot(const btQuaternion & q)299 btScalar dot(const btQuaternion& q) const
300 {
301 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
302 __m128 vd;
303
304 vd = _mm_mul_ps(mVec128, q.mVec128);
305
306 __m128 t = _mm_movehl_ps(vd, vd);
307 vd = _mm_add_ps(vd, t);
308 t = _mm_shuffle_ps(vd, vd, 0x55);
309 vd = _mm_add_ss(vd, t);
310
311 return _mm_cvtss_f32(vd);
312 #elif defined(BT_USE_NEON)
313 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
314 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
315 x = vpadd_f32(x, x);
316 return vget_lane_f32(x, 0);
317 #else
318 return m_floats[0] * q.x() +
319 m_floats[1] * q.y() +
320 m_floats[2] * q.z() +
321 m_floats[3] * q.m_floats[3];
322 #endif
323 }
324
325 /**@brief Return the length squared of the quaternion */
length2()326 btScalar length2() const
327 {
328 return dot(*this);
329 }
330
331 /**@brief Return the length of the quaternion */
length()332 btScalar length() const
333 {
334 return btSqrt(length2());
335 }
336
337 /**@brief Normalize the quaternion
338 * Such that x^2 + y^2 + z^2 +w^2 = 1 */
normalize()339 btQuaternion& normalize()
340 {
341 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
342 __m128 vd;
343
344 vd = _mm_mul_ps(mVec128, mVec128);
345
346 __m128 t = _mm_movehl_ps(vd, vd);
347 vd = _mm_add_ps(vd, t);
348 t = _mm_shuffle_ps(vd, vd, 0x55);
349 vd = _mm_add_ss(vd, t);
350
351 vd = _mm_sqrt_ss(vd);
352 vd = _mm_div_ss(vOnes, vd);
353 vd = bt_pshufd_ps(vd, 0); // splat
354 mVec128 = _mm_mul_ps(mVec128, vd);
355
356 return *this;
357 #else
358 return *this /= length();
359 #endif
360 }
361
362 /**@brief Return a scaled version of this quaternion
363 * @param s The scale factor */
364 SIMD_FORCE_INLINE btQuaternion
365 operator*(const btScalar& s) const
366 {
367 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
368 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
369 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
370
371 return btQuaternion(_mm_mul_ps(mVec128, vs));
372 #elif defined(BT_USE_NEON)
373 return btQuaternion(vmulq_n_f32(mVec128, s));
374 #else
375 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
376 #endif
377 }
378
379 /**@brief Return an inversely scaled versionof this quaternion
380 * @param s The inverse scale factor */
381 btQuaternion operator/(const btScalar& s) const
382 {
383 btAssert(s != btScalar(0.0));
384 return *this * (btScalar(1.0) / s);
385 }
386
387 /**@brief Inversely scale this quaternion
388 * @param s The scale factor */
389 btQuaternion& operator/=(const btScalar& s)
390 {
391 btAssert(s != btScalar(0.0));
392 return *this *= btScalar(1.0) / s;
393 }
394
395 /**@brief Return a normalized version of this quaternion */
normalized()396 btQuaternion normalized() const
397 {
398 return *this / length();
399 }
400 /**@brief Return the ***half*** angle between this quaternion and the other
401 * @param q The other quaternion */
angle(const btQuaternion & q)402 btScalar angle(const btQuaternion& q) const
403 {
404 btScalar s = btSqrt(length2() * q.length2());
405 btAssert(s != btScalar(0.0));
406 return btAcos(dot(q) / s);
407 }
408
409 /**@brief Return the angle between this quaternion and the other along the shortest path
410 * @param q The other quaternion */
angleShortestPath(const btQuaternion & q)411 btScalar angleShortestPath(const btQuaternion& q) const
412 {
413 btScalar s = btSqrt(length2() * q.length2());
414 btAssert(s != btScalar(0.0));
415 if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
416 return btAcos(dot(-q) / s) * btScalar(2.0);
417 else
418 return btAcos(dot(q) / s) * btScalar(2.0);
419 }
420
421 /**@brief Return the angle [0, 2Pi] of rotation represented by this quaternion */
getAngle()422 btScalar getAngle() const
423 {
424 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
425 return s;
426 }
427
428 /**@brief Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path */
getAngleShortestPath()429 btScalar getAngleShortestPath() const
430 {
431 btScalar s;
432 if (m_floats[3] >= 0)
433 s = btScalar(2.) * btAcos(m_floats[3]);
434 else
435 s = btScalar(2.) * btAcos(-m_floats[3]);
436 return s;
437 }
438
439
440 /**@brief Return the axis of the rotation represented by this quaternion */
getAxis()441 btVector3 getAxis() const
442 {
443 btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
444
445 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
446 return btVector3(1.0, 0.0, 0.0); // Arbitrary
447 btScalar s = 1.f/btSqrt(s_squared);
448 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
449 }
450
451 /**@brief Return the inverse of this quaternion */
inverse()452 btQuaternion inverse() const
453 {
454 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
455 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
456 #elif defined(BT_USE_NEON)
457 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
458 #else
459 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
460 #endif
461 }
462
463 /**@brief Return the sum of this quaternion and the other
464 * @param q2 The other quaternion */
465 SIMD_FORCE_INLINE btQuaternion
466 operator+(const btQuaternion& q2) const
467 {
468 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
469 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
470 #elif defined(BT_USE_NEON)
471 return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
472 #else
473 const btQuaternion& q1 = *this;
474 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
475 #endif
476 }
477
478 /**@brief Return the difference between this quaternion and the other
479 * @param q2 The other quaternion */
480 SIMD_FORCE_INLINE btQuaternion
481 operator-(const btQuaternion& q2) const
482 {
483 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
484 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
485 #elif defined(BT_USE_NEON)
486 return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
487 #else
488 const btQuaternion& q1 = *this;
489 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
490 #endif
491 }
492
493 /**@brief Return the negative of this quaternion
494 * This simply negates each element */
495 SIMD_FORCE_INLINE btQuaternion operator-() const
496 {
497 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
498 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
499 #elif defined(BT_USE_NEON)
500 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
501 #else
502 const btQuaternion& q2 = *this;
503 return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]);
504 #endif
505 }
506 /**@todo document this and it's use */
farthest(const btQuaternion & qd)507 SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
508 {
509 btQuaternion diff,sum;
510 diff = *this - qd;
511 sum = *this + qd;
512 if( diff.dot(diff) > sum.dot(sum) )
513 return qd;
514 return (-qd);
515 }
516
517 /**@todo document this and it's use */
nearest(const btQuaternion & qd)518 SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
519 {
520 btQuaternion diff,sum;
521 diff = *this - qd;
522 sum = *this + qd;
523 if( diff.dot(diff) < sum.dot(sum) )
524 return qd;
525 return (-qd);
526 }
527
528
529 /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
530 * @param q The other quaternion to interpolate with
531 * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
532 * Slerp interpolates assuming constant velocity. */
slerp(const btQuaternion & q,const btScalar & t)533 btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
534 {
535
536 const btScalar magnitude = btSqrt(length2() * q.length2());
537 btAssert(magnitude > btScalar(0));
538
539 const btScalar product = dot(q) / magnitude;
540 const btScalar absproduct = btFabs(product);
541
542 if(absproduct < btScalar(1.0 - SIMD_EPSILON))
543 {
544 // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
545 const btScalar theta = btAcos(absproduct);
546 const btScalar d = btSin(theta);
547 btAssert(d > btScalar(0));
548
549 const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
550 const btScalar s0 = btSin((btScalar(1.0) - t) * theta) / d;
551 const btScalar s1 = btSin(sign * t * theta) / d;
552
553 return btQuaternion(
554 (m_floats[0] * s0 + q.x() * s1),
555 (m_floats[1] * s0 + q.y() * s1),
556 (m_floats[2] * s0 + q.z() * s1),
557 (m_floats[3] * s0 + q.w() * s1));
558 }
559 else
560 {
561 return *this;
562 }
563 }
564
getIdentity()565 static const btQuaternion& getIdentity()
566 {
567 static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
568 return identityQuat;
569 }
570
getW()571 SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
572
573 SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const;
574
575 SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionData& dataIn);
576
577 SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const;
578
579 SIMD_FORCE_INLINE void deSerializeFloat(const struct btQuaternionFloatData& dataIn);
580
581 SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const;
582
583 SIMD_FORCE_INLINE void deSerializeDouble(const struct btQuaternionDoubleData& dataIn);
584
585 };
586
587
588
589
590
591 /**@brief Return the product of two quaternions */
592 SIMD_FORCE_INLINE btQuaternion
593 operator*(const btQuaternion& q1, const btQuaternion& q2)
594 {
595 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
596 __m128 vQ1 = q1.get128();
597 __m128 vQ2 = q2.get128();
598 __m128 A0, A1, B1, A2, B2;
599
600 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
601 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
602
603 A1 = A1 * B1;
604
605 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
606 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
607
608 A2 = A2 * B2;
609
610 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
611 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
612
613 B1 = B1 * B2; // A3 *= B3
614
615 A0 = bt_splat_ps(vQ1, 3); // A0
616 A0 = A0 * vQ2; // A0 * B0
617
618 A1 = A1 + A2; // AB12
619 A0 = A0 - B1; // AB03 = AB0 - AB3
620
621 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
622 A0 = A0 + A1; // AB03 + AB12
623
624 return btQuaternion(A0);
625
626 #elif defined(BT_USE_NEON)
627
628 float32x4_t vQ1 = q1.get128();
629 float32x4_t vQ2 = q2.get128();
630 float32x4_t A0, A1, B1, A2, B2, A3, B3;
631 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
632
633 {
634 float32x2x2_t tmp;
635 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
636 vQ1zx = tmp.val[0];
637
638 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
639 vQ2zx = tmp.val[0];
640 }
641 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
642
643 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
644
645 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
646 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
647
648 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
649 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
650
651 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
652 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
653
654 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
655 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
656
657 A1 = vmulq_f32(A1, B1);
658 A2 = vmulq_f32(A2, B2);
659 A3 = vmulq_f32(A3, B3); // A3 *= B3
660 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
661
662 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
663 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
664
665 // change the sign of the last element
666 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
667 A0 = vaddq_f32(A0, A1); // AB03 + AB12
668
669 return btQuaternion(A0);
670
671 #else
672 return btQuaternion(
673 q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
674 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
675 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
676 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
677 #endif
678 }
679
680 SIMD_FORCE_INLINE btQuaternion
681 operator*(const btQuaternion& q, const btVector3& w)
682 {
683 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
684 __m128 vQ1 = q.get128();
685 __m128 vQ2 = w.get128();
686 __m128 A1, B1, A2, B2, A3, B3;
687
688 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
689 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
690
691 A1 = A1 * B1;
692
693 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
694 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
695
696 A2 = A2 * B2;
697
698 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
699 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
700
701 A3 = A3 * B3; // A3 *= B3
702
703 A1 = A1 + A2; // AB12
704 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
705 A1 = A1 - A3; // AB123 = AB12 - AB3
706
707 return btQuaternion(A1);
708
709 #elif defined(BT_USE_NEON)
710
711 float32x4_t vQ1 = q.get128();
712 float32x4_t vQ2 = w.get128();
713 float32x4_t A1, B1, A2, B2, A3, B3;
714 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
715
716 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
717 {
718 float32x2x2_t tmp;
719
720 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
721 vQ2zx = tmp.val[0];
722
723 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
724 vQ1zx = tmp.val[0];
725 }
726
727 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
728
729 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
730 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
731
732 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
733 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
734
735 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
736 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
737
738 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
739 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
740
741 A1 = vmulq_f32(A1, B1);
742 A2 = vmulq_f32(A2, B2);
743 A3 = vmulq_f32(A3, B3); // A3 *= B3
744
745 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
746
747 // change the sign of the last element
748 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
749
750 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
751
752 return btQuaternion(A1);
753
754 #else
755 return btQuaternion(
756 q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
757 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
758 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
759 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
760 #endif
761 }
762
763 SIMD_FORCE_INLINE btQuaternion
764 operator*(const btVector3& w, const btQuaternion& q)
765 {
766 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
767 __m128 vQ1 = w.get128();
768 __m128 vQ2 = q.get128();
769 __m128 A1, B1, A2, B2, A3, B3;
770
771 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x
772 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X
773
774 A1 = A1 * B1;
775
776 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
777 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
778
779 A2 = A2 *B2;
780
781 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
782 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
783
784 A3 = A3 * B3; // A3 *= B3
785
786 A1 = A1 + A2; // AB12
787 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
788 A1 = A1 - A3; // AB123 = AB12 - AB3
789
790 return btQuaternion(A1);
791
792 #elif defined(BT_USE_NEON)
793
794 float32x4_t vQ1 = w.get128();
795 float32x4_t vQ2 = q.get128();
796 float32x4_t A1, B1, A2, B2, A3, B3;
797 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
798
799 {
800 float32x2x2_t tmp;
801
802 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
803 vQ1zx = tmp.val[0];
804
805 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
806 vQ2zx = tmp.val[0];
807 }
808 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
809
810 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
811
812 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
813 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
814
815 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
816 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
817
818 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
819 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
820
821 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
822 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
823
824 A1 = vmulq_f32(A1, B1);
825 A2 = vmulq_f32(A2, B2);
826 A3 = vmulq_f32(A3, B3); // A3 *= B3
827
828 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
829
830 // change the sign of the last element
831 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
832
833 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
834
835 return btQuaternion(A1);
836
837 #else
838 return btQuaternion(
839 +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
840 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
841 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
842 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
843 #endif
844 }
845
846 /**@brief Calculate the dot product between two quaternions */
847 SIMD_FORCE_INLINE btScalar
dot(const btQuaternion & q1,const btQuaternion & q2)848 dot(const btQuaternion& q1, const btQuaternion& q2)
849 {
850 return q1.dot(q2);
851 }
852
853
854 /**@brief Return the length of a quaternion */
855 SIMD_FORCE_INLINE btScalar
length(const btQuaternion & q)856 length(const btQuaternion& q)
857 {
858 return q.length();
859 }
860
861 /**@brief Return the angle between two quaternions*/
862 SIMD_FORCE_INLINE btScalar
btAngle(const btQuaternion & q1,const btQuaternion & q2)863 btAngle(const btQuaternion& q1, const btQuaternion& q2)
864 {
865 return q1.angle(q2);
866 }
867
868 /**@brief Return the inverse of a quaternion*/
869 SIMD_FORCE_INLINE btQuaternion
inverse(const btQuaternion & q)870 inverse(const btQuaternion& q)
871 {
872 return q.inverse();
873 }
874
875 /**@brief Return the result of spherical linear interpolation betwen two quaternions
876 * @param q1 The first quaternion
877 * @param q2 The second quaternion
878 * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
879 * Slerp assumes constant velocity between positions. */
880 SIMD_FORCE_INLINE btQuaternion
slerp(const btQuaternion & q1,const btQuaternion & q2,const btScalar & t)881 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
882 {
883 return q1.slerp(q2, t);
884 }
885
886 SIMD_FORCE_INLINE btVector3
quatRotate(const btQuaternion & rotation,const btVector3 & v)887 quatRotate(const btQuaternion& rotation, const btVector3& v)
888 {
889 btQuaternion q = rotation * v;
890 q *= rotation.inverse();
891 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
892 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
893 #elif defined(BT_USE_NEON)
894 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
895 #else
896 return btVector3(q.getX(),q.getY(),q.getZ());
897 #endif
898 }
899
900 SIMD_FORCE_INLINE btQuaternion
shortestArcQuat(const btVector3 & v0,const btVector3 & v1)901 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
902 {
903 btVector3 c = v0.cross(v1);
904 btScalar d = v0.dot(v1);
905
906 if (d < -1.0 + SIMD_EPSILON)
907 {
908 btVector3 n,unused;
909 btPlaneSpace1(v0,n,unused);
910 return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
911 }
912
913 btScalar s = btSqrt((1.0f + d) * 2.0f);
914 btScalar rs = 1.0f / s;
915
916 return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
917 }
918
919 SIMD_FORCE_INLINE btQuaternion
shortestArcQuatNormalize2(btVector3 & v0,btVector3 & v1)920 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
921 {
922 v0.normalize();
923 v1.normalize();
924 return shortestArcQuat(v0,v1);
925 }
926
927
928
929
930 struct btQuaternionFloatData
931 {
932 float m_floats[4];
933 };
934
935 struct btQuaternionDoubleData
936 {
937 double m_floats[4];
938
939 };
940
serializeFloat(struct btQuaternionFloatData & dataOut)941 SIMD_FORCE_INLINE void btQuaternion::serializeFloat(struct btQuaternionFloatData& dataOut) const
942 {
943 ///could also do a memcpy, check if it is worth it
944 for (int i=0;i<4;i++)
945 dataOut.m_floats[i] = float(m_floats[i]);
946 }
947
deSerializeFloat(const struct btQuaternionFloatData & dataIn)948 SIMD_FORCE_INLINE void btQuaternion::deSerializeFloat(const struct btQuaternionFloatData& dataIn)
949 {
950 for (int i=0;i<4;i++)
951 m_floats[i] = btScalar(dataIn.m_floats[i]);
952 }
953
954
serializeDouble(struct btQuaternionDoubleData & dataOut)955 SIMD_FORCE_INLINE void btQuaternion::serializeDouble(struct btQuaternionDoubleData& dataOut) const
956 {
957 ///could also do a memcpy, check if it is worth it
958 for (int i=0;i<4;i++)
959 dataOut.m_floats[i] = double(m_floats[i]);
960 }
961
deSerializeDouble(const struct btQuaternionDoubleData & dataIn)962 SIMD_FORCE_INLINE void btQuaternion::deSerializeDouble(const struct btQuaternionDoubleData& dataIn)
963 {
964 for (int i=0;i<4;i++)
965 m_floats[i] = btScalar(dataIn.m_floats[i]);
966 }
967
968
serialize(struct btQuaternionData & dataOut)969 SIMD_FORCE_INLINE void btQuaternion::serialize(struct btQuaternionData& dataOut) const
970 {
971 ///could also do a memcpy, check if it is worth it
972 for (int i=0;i<4;i++)
973 dataOut.m_floats[i] = m_floats[i];
974 }
975
deSerialize(const struct btQuaternionData & dataIn)976 SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionData& dataIn)
977 {
978 for (int i=0;i<4;i++)
979 m_floats[i] = dataIn.m_floats[i];
980 }
981
982
983 #endif //BT_SIMD__QUATERNION_H_
984
985
986
987