1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14
15 // Modified by Yao Wei Tjong for Urho3D
16
17
18 #ifndef BT_VECTOR3_H
19 #define BT_VECTOR3_H
20
21 //#include <stdint.h>
22 #include "btScalar.h"
23 #include "btMinMax.h"
24 #include "btAlignedAllocator.h"
25
26 #ifdef BT_USE_DOUBLE_PRECISION
27 #define btVector3Data btVector3DoubleData
28 #define btVector3DataName "btVector3DoubleData"
29 #else
30 #define btVector3Data btVector3FloatData
31 #define btVector3DataName "btVector3FloatData"
32 #endif //BT_USE_DOUBLE_PRECISION
33
34 #if defined BT_USE_SSE
35
36 //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
37
38 #ifdef _MSC_VER
39 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
40 #endif
41
42
43 #define BT_SHUFFLE(x,y,z,w) (((w)<<6 | (z)<<4 | (y)<<2 | (x)) & 0xff)
44 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
45 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
46 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
47 #define bt_splat_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
48
49 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
50 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
51 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
52 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
53 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
54 #define btvxyzMaskf btvFFF0fMask
55 #define btvAbsfMask btCastiTo128f(btvAbsMask)
56
57 //there is an issue with XCode 3.2 (LCx errors)
58 #define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f))
59 #define v1110 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f))
60 #define vHalf (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f))
61 #define v1_5 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f))
62
63 //const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
64 //const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
65 //const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
66 //const __m128 ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
67
68 #endif
69
70 #ifdef BT_USE_NEON
71
ATTRIBUTE_ALIGNED16(btvMzeroMask)72 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
ATTRIBUTE_ALIGNED16(btvFFF0Mask)73 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF),
74 static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0};
ATTRIBUTE_ALIGNED16(btvAbsMask)75 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
ATTRIBUTE_ALIGNED16(btv3AbsMask)76 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
77
78 #endif
79
80 /**@brief btVector3 can be used to represent 3D points and vectors.
81 * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
82 * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
83 */
ATTRIBUTE_ALIGNED16(class)84 ATTRIBUTE_ALIGNED16(class) btVector3
85 {
86 public:
87
88 BT_DECLARE_ALIGNED_ALLOCATOR();
89
90 #if defined (__SPU__) && defined (__CELLOS_LV2__)
91 btScalar m_floats[4];
92 public:
93 SIMD_FORCE_INLINE const vec_float4& get128() const
94 {
95 return *((const vec_float4*)&m_floats[0]);
96 }
97 public:
98 #else //__CELLOS_LV2__ __SPU__
99 #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
100 union {
101 btSimdFloat4 mVec128;
102 btScalar m_floats[4];
103 };
104 SIMD_FORCE_INLINE btSimdFloat4 get128() const
105 {
106 return mVec128;
107 }
108 SIMD_FORCE_INLINE void set128(btSimdFloat4 v128)
109 {
110 mVec128 = v128;
111 }
112 #else
113 btScalar m_floats[4];
114 #endif
115 #endif //__CELLOS_LV2__ __SPU__
116
117 public:
118
119 /**@brief No initialization constructor */
120 SIMD_FORCE_INLINE btVector3()
121 {
122
123 }
124
125
126
127 /**@brief Constructor from scalars
128 * @param x X value
129 * @param y Y value
130 * @param z Z value
131 */
132 SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
133 {
134 m_floats[0] = _x;
135 m_floats[1] = _y;
136 m_floats[2] = _z;
137 m_floats[3] = btScalar(0.f);
138 }
139
140 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
141 // Set Vector
142 SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
143 {
144 mVec128 = v;
145 }
146
147 // Copy constructor
148 SIMD_FORCE_INLINE btVector3(const btVector3& rhs)
149 {
150 mVec128 = rhs.mVec128;
151 }
152
153 // Assignment Operator
154 SIMD_FORCE_INLINE btVector3&
155 operator=(const btVector3& v)
156 {
157 mVec128 = v.mVec128;
158
159 return *this;
160 }
161 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
162
163 /**@brief Add a vector to this one
164 * @param The vector to add to this one */
165 SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
166 {
167 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
168 mVec128 = _mm_add_ps(mVec128, v.mVec128);
169 #elif defined(BT_USE_NEON)
170 mVec128 = vaddq_f32(mVec128, v.mVec128);
171 #else
172 m_floats[0] += v.m_floats[0];
173 m_floats[1] += v.m_floats[1];
174 m_floats[2] += v.m_floats[2];
175 #endif
176 return *this;
177 }
178
179
180 /**@brief Subtract a vector from this one
181 * @param The vector to subtract */
182 SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v)
183 {
184 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
185 mVec128 = _mm_sub_ps(mVec128, v.mVec128);
186 #elif defined(BT_USE_NEON)
187 mVec128 = vsubq_f32(mVec128, v.mVec128);
188 #else
189 m_floats[0] -= v.m_floats[0];
190 m_floats[1] -= v.m_floats[1];
191 m_floats[2] -= v.m_floats[2];
192 #endif
193 return *this;
194 }
195
196 /**@brief Scale the vector
197 * @param s Scale factor */
198 SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
199 {
200 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
201 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
202 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
203 mVec128 = _mm_mul_ps(mVec128, vs);
204 #elif defined(BT_USE_NEON)
205 mVec128 = vmulq_n_f32(mVec128, s);
206 #else
207 m_floats[0] *= s;
208 m_floats[1] *= s;
209 m_floats[2] *= s;
210 #endif
211 return *this;
212 }
213
214 /**@brief Inversely scale the vector
215 * @param s Scale factor to divide by */
216 SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s)
217 {
218 btFullAssert(s != btScalar(0.0));
219
220 #if 0 //defined(BT_USE_SSE_IN_API)
221 // this code is not faster !
222 __m128 vs = _mm_load_ss(&s);
223 vs = _mm_div_ss(v1110, vs);
224 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
225
226 mVec128 = _mm_mul_ps(mVec128, vs);
227
228 return *this;
229 #else
230 return *this *= btScalar(1.0) / s;
231 #endif
232 }
233
234 /**@brief Return the dot product
235 * @param v The other vector in the dot product */
236 SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
237 {
238 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
239 __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
240 __m128 z = _mm_movehl_ps(vd, vd);
241 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
242 vd = _mm_add_ss(vd, y);
243 vd = _mm_add_ss(vd, z);
244 return _mm_cvtss_f32(vd);
245 #elif defined(BT_USE_NEON)
246 float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
247 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
248 x = vadd_f32(x, vget_high_f32(vd));
249 return vget_lane_f32(x, 0);
250 #else
251 return m_floats[0] * v.m_floats[0] +
252 m_floats[1] * v.m_floats[1] +
253 m_floats[2] * v.m_floats[2];
254 #endif
255 }
256
257 /**@brief Return the length of the vector squared */
258 SIMD_FORCE_INLINE btScalar length2() const
259 {
260 return dot(*this);
261 }
262
263 /**@brief Return the length of the vector */
264 SIMD_FORCE_INLINE btScalar length() const
265 {
266 return btSqrt(length2());
267 }
268
269 /**@brief Return the norm (length) of the vector */
270 SIMD_FORCE_INLINE btScalar norm() const
271 {
272 return length();
273 }
274
275 /**@brief Return the norm (length) of the vector */
276 SIMD_FORCE_INLINE btScalar safeNorm() const
277 {
278 btScalar d = length2();
279 //workaround for some clang/gcc issue of sqrtf(tiny number) = -INF
280 if (d>SIMD_EPSILON)
281 return btSqrt(d);
282 return btScalar(0);
283 }
284
285 /**@brief Return the distance squared between the ends of this and another vector
286 * This is symantically treating the vector like a point */
287 SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
288
289 /**@brief Return the distance between the ends of this and another vector
290 * This is symantically treating the vector like a point */
291 SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
292
293 SIMD_FORCE_INLINE btVector3& safeNormalize()
294 {
295 btScalar l2 = length2();
296 //triNormal.normalize();
297 if (l2 >= SIMD_EPSILON*SIMD_EPSILON)
298 {
299 (*this) /= btSqrt(l2);
300 }
301 else
302 {
303 setValue(1, 0, 0);
304 }
305 return *this;
306 }
307
308 /**@brief Normalize this vector
309 * x^2 + y^2 + z^2 = 1 */
310 SIMD_FORCE_INLINE btVector3& normalize()
311 {
312
313 btAssert(!fuzzyZero());
314
315 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
316 // dot product first
317 __m128 vd = _mm_mul_ps(mVec128, mVec128);
318 __m128 z = _mm_movehl_ps(vd, vd);
319 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
320 vd = _mm_add_ss(vd, y);
321 vd = _mm_add_ss(vd, z);
322
323 #if 0
324 vd = _mm_sqrt_ss(vd);
325 vd = _mm_div_ss(v1110, vd);
326 vd = bt_splat_ps(vd, 0x80);
327 mVec128 = _mm_mul_ps(mVec128, vd);
328 #else
329
330 // NR step 1/sqrt(x) - vd is x, y is output
331 y = _mm_rsqrt_ss(vd); // estimate
332
333 // one step NR
334 z = v1_5;
335 vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
336 //x2 = vd;
337 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
338 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
339 z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
340
341 y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
342
343 y = bt_splat_ps(y, 0x80);
344 mVec128 = _mm_mul_ps(mVec128, y);
345
346 #endif
347
348
349 return *this;
350 #else
351 return *this /= length();
352 #endif
353 }
354
355 /**@brief Return a normalized version of this vector */
356 SIMD_FORCE_INLINE btVector3 normalized() const;
357
358 /**@brief Return a rotated version of this vector
359 * @param wAxis The axis to rotate about
360 * @param angle The angle to rotate by */
361 SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
362
363 /**@brief Return the angle between this and another vector
364 * @param v The other vector */
365 SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const
366 {
367 btScalar s = btSqrt(length2() * v.length2());
368 btFullAssert(s != btScalar(0.0));
369 return btAcos(dot(v) / s);
370 }
371
372 /**@brief Return a vector will the absolute values of each element */
373 SIMD_FORCE_INLINE btVector3 absolute() const
374 {
375
376 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
377 return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
378 #elif defined(BT_USE_NEON)
379 return btVector3(vabsq_f32(mVec128));
380 #else
381 return btVector3(
382 btFabs(m_floats[0]),
383 btFabs(m_floats[1]),
384 btFabs(m_floats[2]));
385 #endif
386 }
387
388 /**@brief Return the cross product between this and another vector
389 * @param v The other vector */
390 SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
391 {
392 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
393 __m128 T, V;
394
395 T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
396 V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
397
398 V = _mm_mul_ps(V, mVec128);
399 T = _mm_mul_ps(T, v.mVec128);
400 V = _mm_sub_ps(V, T);
401
402 V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
403 return btVector3(V);
404 #elif defined(BT_USE_NEON)
405 float32x4_t T, V;
406 // form (Y, Z, X, _) of mVec128 and v.mVec128
407 float32x2_t Tlow = vget_low_f32(mVec128);
408 float32x2_t Vlow = vget_low_f32(v.mVec128);
409 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
410 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
411
412 V = vmulq_f32(V, mVec128);
413 T = vmulq_f32(T, v.mVec128);
414 V = vsubq_f32(V, T);
415 Vlow = vget_low_f32(V);
416 // form (Y, Z, X, _);
417 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
418 V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
419
420 return btVector3(V);
421 #else
422 return btVector3(
423 m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
424 m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
425 m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
426 #endif
427 }
428
429 SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
430 {
431 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
432 // cross:
433 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
434 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
435
436 V = _mm_mul_ps(V, v1.mVec128);
437 T = _mm_mul_ps(T, v2.mVec128);
438 V = _mm_sub_ps(V, T);
439
440 V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
441
442 // dot:
443 V = _mm_mul_ps(V, mVec128);
444 __m128 z = _mm_movehl_ps(V, V);
445 __m128 y = _mm_shuffle_ps(V, V, 0x55);
446 V = _mm_add_ss(V, y);
447 V = _mm_add_ss(V, z);
448 return _mm_cvtss_f32(V);
449
450 #elif defined(BT_USE_NEON)
451 // cross:
452 float32x4_t T, V;
453 // form (Y, Z, X, _) of mVec128 and v.mVec128
454 float32x2_t Tlow = vget_low_f32(v1.mVec128);
455 float32x2_t Vlow = vget_low_f32(v2.mVec128);
456 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
457 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
458
459 V = vmulq_f32(V, v1.mVec128);
460 T = vmulq_f32(T, v2.mVec128);
461 V = vsubq_f32(V, T);
462 Vlow = vget_low_f32(V);
463 // form (Y, Z, X, _);
464 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
465
466 // dot:
467 V = vmulq_f32(mVec128, V);
468 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
469 x = vadd_f32(x, vget_high_f32(V));
470 return vget_lane_f32(x, 0);
471 #else
472 return
473 m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
474 m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
475 m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
476 #endif
477 }
478
479 /**@brief Return the axis with the smallest value
480 * Note return values are 0,1,2 for x, y, or z */
481 SIMD_FORCE_INLINE int minAxis() const
482 {
483 return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
484 }
485
486 /**@brief Return the axis with the largest value
487 * Note return values are 0,1,2 for x, y, or z */
488 SIMD_FORCE_INLINE int maxAxis() const
489 {
490 return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
491 }
492
493 SIMD_FORCE_INLINE int furthestAxis() const
494 {
495 return absolute().minAxis();
496 }
497
498 SIMD_FORCE_INLINE int closestAxis() const
499 {
500 return absolute().maxAxis();
501 }
502
503
504 SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
505 {
506 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
507 __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
508 btScalar s = btScalar(1.0) - rt;
509 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
510 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
511 __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
512 vrt = bt_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
513 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
514 __m128 tmp3 = _mm_add_ps(r0,r1);
515 mVec128 = tmp3;
516 #elif defined(BT_USE_NEON)
517 float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
518 vl = vmulq_n_f32(vl, rt);
519 mVec128 = vaddq_f32(vl, v0.mVec128);
520 #else
521 btScalar s = btScalar(1.0) - rt;
522 m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
523 m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
524 m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
525 //don't do the unused w component
526 // m_co[3] = s * v0[3] + rt * v1[3];
527 #endif
528 }
529
530 /**@brief Return the linear interpolation between this and another vector
531 * @param v The other vector
532 * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
533 SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const
534 {
535 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
536 __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
537 vt = bt_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
538 __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
539 vl = _mm_mul_ps(vl, vt);
540 vl = _mm_add_ps(vl, mVec128);
541
542 return btVector3(vl);
543 #elif defined(BT_USE_NEON)
544 float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
545 vl = vmulq_n_f32(vl, t);
546 vl = vaddq_f32(vl, mVec128);
547
548 return btVector3(vl);
549 #else
550 return
551 btVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
552 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
553 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
554 #endif
555 }
556
557 /**@brief Elementwise multiply this vector by the other
558 * @param v The other vector */
559 SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
560 {
561 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
562 mVec128 = _mm_mul_ps(mVec128, v.mVec128);
563 #elif defined(BT_USE_NEON)
564 mVec128 = vmulq_f32(mVec128, v.mVec128);
565 #else
566 m_floats[0] *= v.m_floats[0];
567 m_floats[1] *= v.m_floats[1];
568 m_floats[2] *= v.m_floats[2];
569 #endif
570 return *this;
571 }
572
573 /**@brief Return the x value */
574 SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
575 /**@brief Return the y value */
576 SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
577 /**@brief Return the z value */
578 SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
579 /**@brief Set the x value */
580 SIMD_FORCE_INLINE void setX(btScalar _x) { m_floats[0] = _x;};
581 /**@brief Set the y value */
582 SIMD_FORCE_INLINE void setY(btScalar _y) { m_floats[1] = _y;};
583 /**@brief Set the z value */
584 SIMD_FORCE_INLINE void setZ(btScalar _z) { m_floats[2] = _z;};
585 /**@brief Set the w value */
586 SIMD_FORCE_INLINE void setW(btScalar _w) { m_floats[3] = _w;};
587 /**@brief Return the x value */
588 SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
589 /**@brief Return the y value */
590 SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
591 /**@brief Return the z value */
592 SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
593 /**@brief Return the w value */
594 SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
595
596 //SIMD_FORCE_INLINE btScalar& operator[](int i) { return (&m_floats[0])[i]; }
597 //SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
598 ///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
599 SIMD_FORCE_INLINE operator btScalar *() { return &m_floats[0]; }
600 SIMD_FORCE_INLINE operator const btScalar *() const { return &m_floats[0]; }
601
602 SIMD_FORCE_INLINE bool operator==(const btVector3& other) const
603 {
604 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
605 return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
606 #else
607 return ((m_floats[3]==other.m_floats[3]) &&
608 (m_floats[2]==other.m_floats[2]) &&
609 (m_floats[1]==other.m_floats[1]) &&
610 (m_floats[0]==other.m_floats[0]));
611 #endif
612 }
613
614 SIMD_FORCE_INLINE bool operator!=(const btVector3& other) const
615 {
616 return !(*this == other);
617 }
618
619 /**@brief Set each element to the max of the current values and the values of another btVector3
620 * @param other The other btVector3 to compare with
621 */
622 SIMD_FORCE_INLINE void setMax(const btVector3& other)
623 {
624 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
625 mVec128 = _mm_max_ps(mVec128, other.mVec128);
626 #elif defined(BT_USE_NEON)
627 mVec128 = vmaxq_f32(mVec128, other.mVec128);
628 #else
629 btSetMax(m_floats[0], other.m_floats[0]);
630 btSetMax(m_floats[1], other.m_floats[1]);
631 btSetMax(m_floats[2], other.m_floats[2]);
632 btSetMax(m_floats[3], other.w());
633 #endif
634 }
635
636 /**@brief Set each element to the min of the current values and the values of another btVector3
637 * @param other The other btVector3 to compare with
638 */
639 SIMD_FORCE_INLINE void setMin(const btVector3& other)
640 {
641 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
642 mVec128 = _mm_min_ps(mVec128, other.mVec128);
643 #elif defined(BT_USE_NEON)
644 mVec128 = vminq_f32(mVec128, other.mVec128);
645 #else
646 btSetMin(m_floats[0], other.m_floats[0]);
647 btSetMin(m_floats[1], other.m_floats[1]);
648 btSetMin(m_floats[2], other.m_floats[2]);
649 btSetMin(m_floats[3], other.w());
650 #endif
651 }
652
653 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
654 {
655 m_floats[0]=_x;
656 m_floats[1]=_y;
657 m_floats[2]=_z;
658 m_floats[3] = btScalar(0.f);
659 }
660
661 void getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
662 {
663 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
664
665 __m128 V = _mm_and_ps(mVec128, btvFFF0fMask);
666 __m128 V0 = _mm_xor_ps(btvMzeroMask, V);
667 __m128 V2 = _mm_movelh_ps(V0, V);
668
669 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
670
671 V0 = _mm_shuffle_ps(V0, V, 0xDB);
672 V2 = _mm_shuffle_ps(V2, V, 0xF9);
673
674 v0->mVec128 = V0;
675 v1->mVec128 = V1;
676 v2->mVec128 = V2;
677 #else
678 v0->setValue(0. ,-z() ,y());
679 v1->setValue(z() ,0. ,-x());
680 v2->setValue(-y() ,x() ,0.);
681 #endif
682 }
683
684 void setZero()
685 {
686 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
687 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
688 #elif defined(BT_USE_NEON)
689 int32x4_t vi = vdupq_n_s32(0);
690 mVec128 = vreinterpretq_f32_s32(vi);
691 #else
692 setValue(btScalar(0.),btScalar(0.),btScalar(0.));
693 #endif
694 }
695
696 SIMD_FORCE_INLINE bool isZero() const
697 {
698 return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
699 }
700
701
702 SIMD_FORCE_INLINE bool fuzzyZero() const
703 {
704 return length2() < SIMD_EPSILON*SIMD_EPSILON;
705 }
706
707 SIMD_FORCE_INLINE void serialize(struct btVector3Data& dataOut) const;
708
709 SIMD_FORCE_INLINE void deSerialize(const struct btVector3Data& dataIn);
710
711 SIMD_FORCE_INLINE void serializeFloat(struct btVector3FloatData& dataOut) const;
712
713 SIMD_FORCE_INLINE void deSerializeFloat(const struct btVector3FloatData& dataIn);
714
715 SIMD_FORCE_INLINE void serializeDouble(struct btVector3DoubleData& dataOut) const;
716
717 SIMD_FORCE_INLINE void deSerializeDouble(const struct btVector3DoubleData& dataIn);
718
719 /**@brief returns index of maximum dot product between this and vectors in array[]
720 * @param array The other vectors
721 * @param array_count The number of other vectors
722 * @param dotOut The maximum dot product */
723 SIMD_FORCE_INLINE long maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
724
725 /**@brief returns index of minimum dot product between this and vectors in array[]
726 * @param array The other vectors
727 * @param array_count The number of other vectors
728 * @param dotOut The minimum dot product */
729 SIMD_FORCE_INLINE long minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
730
731 /* create a vector as btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 )) */
732 SIMD_FORCE_INLINE btVector3 dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
733 {
734 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
735
736 __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
737 __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
738 __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
739 __m128 b0 = _mm_unpacklo_ps( a0, a1 );
740 __m128 b1 = _mm_unpackhi_ps( a0, a1 );
741 __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
742 __m128 r = _mm_movelh_ps( b0, b2 );
743 r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
744 a2 = _mm_and_ps( a2, btvxyzMaskf);
745 r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
746 return btVector3(r);
747
748 #elif defined(BT_USE_NEON)
749 static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
750 float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
751 float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
752 float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
753 float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
754 a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
755 float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
756 float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
757 return btVector3( vcombine_f32(b0, b1) );
758 #else
759 return btVector3( dot(v0), dot(v1), dot(v2));
760 #endif
761 }
762 };
763
764 /**@brief Return the sum of two vectors (Point symantics)*/
765 SIMD_FORCE_INLINE btVector3
766 operator+(const btVector3& v1, const btVector3& v2)
767 {
768 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
769 return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
770 #elif defined(BT_USE_NEON)
771 return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
772 #else
773 return btVector3(
774 v1.m_floats[0] + v2.m_floats[0],
775 v1.m_floats[1] + v2.m_floats[1],
776 v1.m_floats[2] + v2.m_floats[2]);
777 #endif
778 }
779
780 /**@brief Return the elementwise product of two vectors */
781 SIMD_FORCE_INLINE btVector3
782 operator*(const btVector3& v1, const btVector3& v2)
783 {
784 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
785 return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
786 #elif defined(BT_USE_NEON)
787 return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
788 #else
789 return btVector3(
790 v1.m_floats[0] * v2.m_floats[0],
791 v1.m_floats[1] * v2.m_floats[1],
792 v1.m_floats[2] * v2.m_floats[2]);
793 #endif
794 }
795
796 /**@brief Return the difference between two vectors */
797 SIMD_FORCE_INLINE btVector3
798 operator-(const btVector3& v1, const btVector3& v2)
799 {
800 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
801
802 // without _mm_and_ps this code causes slowdown in Concave moving
803 __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
804 return btVector3(_mm_and_ps(r, btvFFF0fMask));
805 #elif defined(BT_USE_NEON)
806 float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
807 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
808 #else
809 return btVector3(
810 v1.m_floats[0] - v2.m_floats[0],
811 v1.m_floats[1] - v2.m_floats[1],
812 v1.m_floats[2] - v2.m_floats[2]);
813 #endif
814 }
815
816 /**@brief Return the negative of the vector */
817 SIMD_FORCE_INLINE btVector3
818 operator-(const btVector3& v)
819 {
820 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
821 __m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
822 return btVector3(_mm_and_ps(r, btvFFF0fMask));
823 #elif defined(BT_USE_NEON)
824 return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
825 #else
826 return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
827 #endif
828 }
829
830 /**@brief Return the vector scaled by s */
831 SIMD_FORCE_INLINE btVector3
832 operator*(const btVector3& v, const btScalar& s)
833 {
834 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
835 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
836 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
837 return btVector3(_mm_mul_ps(v.mVec128, vs));
838 #elif defined(BT_USE_NEON)
839 float32x4_t r = vmulq_n_f32(v.mVec128, s);
840 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
841 #else
842 return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
843 #endif
844 }
845
846 /**@brief Return the vector scaled by s */
847 SIMD_FORCE_INLINE btVector3
848 operator*(const btScalar& s, const btVector3& v)
849 {
850 return v * s;
851 }
852
853 /**@brief Return the vector inversely scaled by s */
854 SIMD_FORCE_INLINE btVector3
855 operator/(const btVector3& v, const btScalar& s)
856 {
857 btFullAssert(s != btScalar(0.0));
858 #if 0 //defined(BT_USE_SSE_IN_API)
859 // this code is not faster !
860 __m128 vs = _mm_load_ss(&s);
861 vs = _mm_div_ss(v1110, vs);
862 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
863
864 return btVector3(_mm_mul_ps(v.mVec128, vs));
865 #else
866 return v * (btScalar(1.0) / s);
867 #endif
868 }
869
870 /**@brief Return the vector inversely scaled by s */
871 SIMD_FORCE_INLINE btVector3
872 operator/(const btVector3& v1, const btVector3& v2)
873 {
874 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
875 __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
876 vec = _mm_and_ps(vec, btvFFF0fMask);
877 return btVector3(vec);
878 #elif defined(BT_USE_NEON)
879 float32x4_t x, y, v, m;
880
881 x = v1.mVec128;
882 y = v2.mVec128;
883
884 v = vrecpeq_f32(y); // v ~ 1/y
885 m = vrecpsq_f32(y, v); // m = (2-v*y)
886 v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
887 m = vrecpsq_f32(y, v); // mm = (2-vv*y)
888 v = vmulq_f32(v, x); // x*vv
889 v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
890
891 return btVector3(v);
892 #else
893 return btVector3(
894 v1.m_floats[0] / v2.m_floats[0],
895 v1.m_floats[1] / v2.m_floats[1],
896 v1.m_floats[2] / v2.m_floats[2]);
897 #endif
898 }
899
900 /**@brief Return the dot product between two vectors */
901 SIMD_FORCE_INLINE btScalar
btDot(const btVector3 & v1,const btVector3 & v2)902 btDot(const btVector3& v1, const btVector3& v2)
903 {
904 return v1.dot(v2);
905 }
906
907
908 /**@brief Return the distance squared between two vectors */
909 SIMD_FORCE_INLINE btScalar
btDistance2(const btVector3 & v1,const btVector3 & v2)910 btDistance2(const btVector3& v1, const btVector3& v2)
911 {
912 return v1.distance2(v2);
913 }
914
915
916 /**@brief Return the distance between two vectors */
917 SIMD_FORCE_INLINE btScalar
btDistance(const btVector3 & v1,const btVector3 & v2)918 btDistance(const btVector3& v1, const btVector3& v2)
919 {
920 return v1.distance(v2);
921 }
922
923 /**@brief Return the angle between two vectors */
924 SIMD_FORCE_INLINE btScalar
btAngle(const btVector3 & v1,const btVector3 & v2)925 btAngle(const btVector3& v1, const btVector3& v2)
926 {
927 return v1.angle(v2);
928 }
929
930 /**@brief Return the cross product of two vectors */
931 SIMD_FORCE_INLINE btVector3
btCross(const btVector3 & v1,const btVector3 & v2)932 btCross(const btVector3& v1, const btVector3& v2)
933 {
934 return v1.cross(v2);
935 }
936
937 SIMD_FORCE_INLINE btScalar
btTriple(const btVector3 & v1,const btVector3 & v2,const btVector3 & v3)938 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
939 {
940 return v1.triple(v2, v3);
941 }
942
943 /**@brief Return the linear interpolation between two vectors
944 * @param v1 One vector
945 * @param v2 The other vector
946 * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
947 SIMD_FORCE_INLINE btVector3
lerp(const btVector3 & v1,const btVector3 & v2,const btScalar & t)948 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
949 {
950 return v1.lerp(v2, t);
951 }
952
953
954
distance2(const btVector3 & v)955 SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const
956 {
957 return (v - *this).length2();
958 }
959
distance(const btVector3 & v)960 SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
961 {
962 return (v - *this).length();
963 }
964
normalized()965 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
966 {
967 btVector3 nrm = *this;
968
969 return nrm.normalize();
970 }
971
rotate(const btVector3 & wAxis,const btScalar _angle)972 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
973 {
974 // wAxis must be a unit lenght vector
975
976 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
977
978 __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
979 btScalar ssin = btSin( _angle );
980 __m128 C = wAxis.cross( mVec128 ).mVec128;
981 O = _mm_and_ps(O, btvFFF0fMask);
982 btScalar scos = btCos( _angle );
983
984 __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
985 __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
986
987 __m128 Y = bt_pshufd_ps(O, 0xC9); // (Y Z X 0)
988 __m128 Z = bt_pshufd_ps(O, 0xD2); // (Z X Y 0)
989 O = _mm_add_ps(O, Y);
990 vsin = bt_pshufd_ps(vsin, 0x80); // (S S S 0)
991 O = _mm_add_ps(O, Z);
992 vcos = bt_pshufd_ps(vcos, 0x80); // (S S S 0)
993
994 vsin = vsin * C;
995 O = O * wAxis.mVec128;
996 __m128 X = mVec128 - O;
997
998 O = O + vsin;
999 vcos = vcos * X;
1000 O = O + vcos;
1001
1002 return btVector3(O);
1003 #else
1004 btVector3 o = wAxis * wAxis.dot( *this );
1005 btVector3 _x = *this - o;
1006 btVector3 _y;
1007
1008 _y = wAxis.cross( *this );
1009
1010 return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
1011 #endif
1012 }
1013
maxDot(const btVector3 * array,long array_count,btScalar & dotOut)1014 SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1015 {
1016 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1017 #ifdef BT_USE_SSE // Urho3D - to be consistent with the function return below
1018 const long scalar_cutoff = 10;
1019 long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1020 #elif defined BT_USE_NEON
1021 const long scalar_cutoff = 4;
1022 extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1023 #endif
1024 if( array_count < scalar_cutoff )
1025 #endif
1026 {
1027 btScalar maxDot1 = -SIMD_INFINITY;
1028 int i = 0;
1029 int ptIndex = -1;
1030 for( i = 0; i < array_count; i++ )
1031 {
1032 btScalar dot = array[i].dot(*this);
1033
1034 if( dot > maxDot1 )
1035 {
1036 maxDot1 = dot;
1037 ptIndex = i;
1038 }
1039 }
1040
1041 dotOut = maxDot1;
1042 return ptIndex;
1043 }
1044 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1045 return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1046 #endif
1047 }
1048
minDot(const btVector3 * array,long array_count,btScalar & dotOut)1049 SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1050 {
1051 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1052 #if defined BT_USE_SSE
1053 const long scalar_cutoff = 10;
1054 long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1055 #elif defined BT_USE_NEON
1056 const long scalar_cutoff = 4;
1057 extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1058 #else
1059 #error unhandled arch!
1060 #endif
1061
1062 if( array_count < scalar_cutoff )
1063 #endif
1064 {
1065 btScalar minDot = SIMD_INFINITY;
1066 int i = 0;
1067 int ptIndex = -1;
1068
1069 for( i = 0; i < array_count; i++ )
1070 {
1071 btScalar dot = array[i].dot(*this);
1072
1073 if( dot < minDot )
1074 {
1075 minDot = dot;
1076 ptIndex = i;
1077 }
1078 }
1079
1080 dotOut = minDot;
1081
1082 return ptIndex;
1083 }
1084 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1085 return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1086 #endif//BT_USE_SIMD_VECTOR3
1087 }
1088
1089
1090 class btVector4 : public btVector3
1091 {
1092 public:
1093
btVector4()1094 SIMD_FORCE_INLINE btVector4() {}
1095
1096
btVector4(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1097 SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1098 : btVector3(_x,_y,_z)
1099 {
1100 m_floats[3] = _w;
1101 }
1102
1103 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON)
btVector4(const btSimdFloat4 vec)1104 SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
1105 {
1106 mVec128 = vec;
1107 }
1108
btVector4(const btVector3 & rhs)1109 SIMD_FORCE_INLINE btVector4(const btVector3& rhs)
1110 {
1111 mVec128 = rhs.mVec128;
1112 }
1113
1114 SIMD_FORCE_INLINE btVector4&
1115 operator=(const btVector4& v)
1116 {
1117 mVec128 = v.mVec128;
1118 return *this;
1119 }
1120 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1121
absolute4()1122 SIMD_FORCE_INLINE btVector4 absolute4() const
1123 {
1124 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
1125 return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
1126 #elif defined(BT_USE_NEON)
1127 return btVector4(vabsq_f32(mVec128));
1128 #else
1129 return btVector4(
1130 btFabs(m_floats[0]),
1131 btFabs(m_floats[1]),
1132 btFabs(m_floats[2]),
1133 btFabs(m_floats[3]));
1134 #endif
1135 }
1136
1137
getW()1138 btScalar getW() const { return m_floats[3];}
1139
1140
maxAxis4()1141 SIMD_FORCE_INLINE int maxAxis4() const
1142 {
1143 int maxIndex = -1;
1144 btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
1145 if (m_floats[0] > maxVal)
1146 {
1147 maxIndex = 0;
1148 maxVal = m_floats[0];
1149 }
1150 if (m_floats[1] > maxVal)
1151 {
1152 maxIndex = 1;
1153 maxVal = m_floats[1];
1154 }
1155 if (m_floats[2] > maxVal)
1156 {
1157 maxIndex = 2;
1158 maxVal =m_floats[2];
1159 }
1160 if (m_floats[3] > maxVal)
1161 {
1162 maxIndex = 3;
1163 maxVal = m_floats[3];
1164 }
1165
1166 return maxIndex;
1167 }
1168
1169
minAxis4()1170 SIMD_FORCE_INLINE int minAxis4() const
1171 {
1172 int minIndex = -1;
1173 btScalar minVal = btScalar(BT_LARGE_FLOAT);
1174 if (m_floats[0] < minVal)
1175 {
1176 minIndex = 0;
1177 minVal = m_floats[0];
1178 }
1179 if (m_floats[1] < minVal)
1180 {
1181 minIndex = 1;
1182 minVal = m_floats[1];
1183 }
1184 if (m_floats[2] < minVal)
1185 {
1186 minIndex = 2;
1187 minVal =m_floats[2];
1188 }
1189 if (m_floats[3] < minVal)
1190 {
1191 minIndex = 3;
1192 minVal = m_floats[3];
1193 }
1194
1195 return minIndex;
1196 }
1197
1198
closestAxis4()1199 SIMD_FORCE_INLINE int closestAxis4() const
1200 {
1201 return absolute4().maxAxis4();
1202 }
1203
1204
1205
1206
1207 /**@brief Set x,y,z and zero w
1208 * @param x Value of x
1209 * @param y Value of y
1210 * @param z Value of z
1211 */
1212
1213
1214 /* void getValue(btScalar *m) const
1215 {
1216 m[0] = m_floats[0];
1217 m[1] = m_floats[1];
1218 m[2] =m_floats[2];
1219 }
1220 */
1221 /**@brief Set the values
1222 * @param x Value of x
1223 * @param y Value of y
1224 * @param z Value of z
1225 * @param w Value of w
1226 */
setValue(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1227 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1228 {
1229 m_floats[0]=_x;
1230 m_floats[1]=_y;
1231 m_floats[2]=_z;
1232 m_floats[3]=_w;
1233 }
1234
1235
1236 };
1237
1238
1239 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapScalarEndian(const btScalar & sourceVal,btScalar & destVal)1240 SIMD_FORCE_INLINE void btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
1241 {
1242 #ifdef BT_USE_DOUBLE_PRECISION
1243 unsigned char* dest = (unsigned char*) &destVal;
1244 unsigned char* src = (unsigned char*) &sourceVal;
1245 dest[0] = src[7];
1246 dest[1] = src[6];
1247 dest[2] = src[5];
1248 dest[3] = src[4];
1249 dest[4] = src[3];
1250 dest[5] = src[2];
1251 dest[6] = src[1];
1252 dest[7] = src[0];
1253 #else
1254 unsigned char* dest = (unsigned char*) &destVal;
1255 unsigned char* src = (unsigned char*) &sourceVal;
1256 dest[0] = src[3];
1257 dest[1] = src[2];
1258 dest[2] = src[1];
1259 dest[3] = src[0];
1260 #endif //BT_USE_DOUBLE_PRECISION
1261 }
1262 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapVector3Endian(const btVector3 & sourceVec,btVector3 & destVec)1263 SIMD_FORCE_INLINE void btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec)
1264 {
1265 for (int i=0;i<4;i++)
1266 {
1267 btSwapScalarEndian(sourceVec[i],destVec[i]);
1268 }
1269
1270 }
1271
1272 ///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btUnSwapVector3Endian(btVector3 & vector)1273 SIMD_FORCE_INLINE void btUnSwapVector3Endian(btVector3& vector)
1274 {
1275
1276 btVector3 swappedVec;
1277 for (int i=0;i<4;i++)
1278 {
1279 btSwapScalarEndian(vector[i],swappedVec[i]);
1280 }
1281 vector = swappedVec;
1282 }
1283
1284 template <class T>
btPlaneSpace1(const T & n,T & p,T & q)1285 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
1286 {
1287 if (btFabs(n[2]) > SIMDSQRT12) {
1288 // choose p in y-z plane
1289 btScalar a = n[1]*n[1] + n[2]*n[2];
1290 btScalar k = btRecipSqrt (a);
1291 p[0] = 0;
1292 p[1] = -n[2]*k;
1293 p[2] = n[1]*k;
1294 // set q = n x p
1295 q[0] = a*k;
1296 q[1] = -n[0]*p[2];
1297 q[2] = n[0]*p[1];
1298 }
1299 else {
1300 // choose p in x-y plane
1301 btScalar a = n[0]*n[0] + n[1]*n[1];
1302 btScalar k = btRecipSqrt (a);
1303 p[0] = -n[1]*k;
1304 p[1] = n[0]*k;
1305 p[2] = 0;
1306 // set q = n x p
1307 q[0] = -n[2]*p[1];
1308 q[1] = n[2]*p[0];
1309 q[2] = a*k;
1310 }
1311 }
1312
1313
1314 struct btVector3FloatData
1315 {
1316 float m_floats[4];
1317 };
1318
1319 struct btVector3DoubleData
1320 {
1321 double m_floats[4];
1322
1323 };
1324
serializeFloat(struct btVector3FloatData & dataOut)1325 SIMD_FORCE_INLINE void btVector3::serializeFloat(struct btVector3FloatData& dataOut) const
1326 {
1327 ///could also do a memcpy, check if it is worth it
1328 for (int i=0;i<4;i++)
1329 dataOut.m_floats[i] = float(m_floats[i]);
1330 }
1331
deSerializeFloat(const struct btVector3FloatData & dataIn)1332 SIMD_FORCE_INLINE void btVector3::deSerializeFloat(const struct btVector3FloatData& dataIn)
1333 {
1334 for (int i=0;i<4;i++)
1335 m_floats[i] = btScalar(dataIn.m_floats[i]);
1336 }
1337
1338
serializeDouble(struct btVector3DoubleData & dataOut)1339 SIMD_FORCE_INLINE void btVector3::serializeDouble(struct btVector3DoubleData& dataOut) const
1340 {
1341 ///could also do a memcpy, check if it is worth it
1342 for (int i=0;i<4;i++)
1343 dataOut.m_floats[i] = double(m_floats[i]);
1344 }
1345
deSerializeDouble(const struct btVector3DoubleData & dataIn)1346 SIMD_FORCE_INLINE void btVector3::deSerializeDouble(const struct btVector3DoubleData& dataIn)
1347 {
1348 for (int i=0;i<4;i++)
1349 m_floats[i] = btScalar(dataIn.m_floats[i]);
1350 }
1351
1352
serialize(struct btVector3Data & dataOut)1353 SIMD_FORCE_INLINE void btVector3::serialize(struct btVector3Data& dataOut) const
1354 {
1355 ///could also do a memcpy, check if it is worth it
1356 for (int i=0;i<4;i++)
1357 dataOut.m_floats[i] = m_floats[i];
1358 }
1359
deSerialize(const struct btVector3Data & dataIn)1360 SIMD_FORCE_INLINE void btVector3::deSerialize(const struct btVector3Data& dataIn)
1361 {
1362 for (int i=0;i<4;i++)
1363 m_floats[i] = dataIn.m_floats[i];
1364 }
1365
1366 #endif //BT_VECTOR3_H
1367