1 /*
2 Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14
15 #ifndef B3_VECTOR3_H
16 #define B3_VECTOR3_H
17
18 //#include <stdint.h>
19 #include "b3Scalar.h"
20 #include "b3MinMax.h"
21 #include "b3AlignedAllocator.h"
22
23 #ifdef B3_USE_DOUBLE_PRECISION
24 #define b3Vector3Data b3Vector3DoubleData
25 #define b3Vector3DataName "b3Vector3DoubleData"
26 #else
27 #define b3Vector3Data b3Vector3FloatData
28 #define b3Vector3DataName "b3Vector3FloatData"
29 #endif //B3_USE_DOUBLE_PRECISION
30
31 #if defined B3_USE_SSE
32
33 //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
34
35 #ifdef _MSC_VER
36 #pragma warning(disable : 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
37 #endif
38
39 #define B3_SHUFFLE(x, y, z, w) (((w) << 6 | (z) << 4 | (y) << 2 | (x)) & 0xff)
40 //#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
41 #define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask))
42 #define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3))
43 #define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i))
44
45 #define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
46 #define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
47 #define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
48 #define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask)
49 #define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask)
50 #define b3vxyzMaskf b3vFFF0fMask
51 #define b3vAbsfMask b3CastiTo128f(b3vAbsMask)
52
53 const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
54 const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
55 const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
56 const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
57
58 #endif
59
60 #ifdef B3_USE_NEON
61
B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask)62 const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask)63 const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
B3_ATTRIBUTE_ALIGNED16(b3vAbsMask)64 const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask)65 const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
66
67 #endif
68
69 class b3Vector3;
70 class b3Vector4;
71
72 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
73 //#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
74 inline b3Vector3 b3MakeVector3(b3SimdFloat4 v);
75 inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec);
76 #endif
77
78 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z);
79 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
80 inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
81
82 /**@brief b3Vector3 can be used to represent 3D points and vectors.
83 * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
84 * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
85 */
B3_ATTRIBUTE_ALIGNED16(class)86 B3_ATTRIBUTE_ALIGNED16(class)
87 b3Vector3
88 {
89 public:
90 #if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
91 union {
92 b3SimdFloat4 mVec128;
93 float m_floats[4];
94 struct
95 {
96 float x, y, z, w;
97 };
98 };
99 #else
100 union {
101 float m_floats[4];
102 struct
103 {
104 float x, y, z, w;
105 };
106 };
107 #endif
108
109 public:
110 B3_DECLARE_ALIGNED_ALLOCATOR();
111
112 #if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
113
114 /*B3_FORCE_INLINE b3Vector3()
115 {
116 }
117 */
118
119 B3_FORCE_INLINE b3SimdFloat4 get128() const
120 {
121 return mVec128;
122 }
123 B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
124 {
125 mVec128 = v128;
126 }
127 #endif
128
129 public:
130 /**@brief Add a vector to this one
131 * @param The vector to add to this one */
132 B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v)
133 {
134 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
135 mVec128 = _mm_add_ps(mVec128, v.mVec128);
136 #elif defined(B3_USE_NEON)
137 mVec128 = vaddq_f32(mVec128, v.mVec128);
138 #else
139 m_floats[0] += v.m_floats[0];
140 m_floats[1] += v.m_floats[1];
141 m_floats[2] += v.m_floats[2];
142 #endif
143 return *this;
144 }
145
146 /**@brief Subtract a vector from this one
147 * @param The vector to subtract */
148 B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v)
149 {
150 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
151 mVec128 = _mm_sub_ps(mVec128, v.mVec128);
152 #elif defined(B3_USE_NEON)
153 mVec128 = vsubq_f32(mVec128, v.mVec128);
154 #else
155 m_floats[0] -= v.m_floats[0];
156 m_floats[1] -= v.m_floats[1];
157 m_floats[2] -= v.m_floats[2];
158 #endif
159 return *this;
160 }
161
162 /**@brief Scale the vector
163 * @param s Scale factor */
164 B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s)
165 {
166 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
167 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
168 vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
169 mVec128 = _mm_mul_ps(mVec128, vs);
170 #elif defined(B3_USE_NEON)
171 mVec128 = vmulq_n_f32(mVec128, s);
172 #else
173 m_floats[0] *= s;
174 m_floats[1] *= s;
175 m_floats[2] *= s;
176 #endif
177 return *this;
178 }
179
180 /**@brief Inversely scale the vector
181 * @param s Scale factor to divide by */
182 B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s)
183 {
184 b3FullAssert(s != b3Scalar(0.0));
185
186 #if 0 //defined(B3_USE_SSE_IN_API)
187 // this code is not faster !
188 __m128 vs = _mm_load_ss(&s);
189 vs = _mm_div_ss(b3v1110, vs);
190 vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
191
192 mVec128 = _mm_mul_ps(mVec128, vs);
193
194 return *this;
195 #else
196 return *this *= b3Scalar(1.0) / s;
197 #endif
198 }
199
200 /**@brief Return the dot product
201 * @param v The other vector in the dot product */
202 B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const
203 {
204 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
205 __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
206 __m128 z = _mm_movehl_ps(vd, vd);
207 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
208 vd = _mm_add_ss(vd, y);
209 vd = _mm_add_ss(vd, z);
210 return _mm_cvtss_f32(vd);
211 #elif defined(B3_USE_NEON)
212 float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
213 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
214 x = vadd_f32(x, vget_high_f32(vd));
215 return vget_lane_f32(x, 0);
216 #else
217 return m_floats[0] * v.m_floats[0] +
218 m_floats[1] * v.m_floats[1] +
219 m_floats[2] * v.m_floats[2];
220 #endif
221 }
222
223 /**@brief Return the length of the vector squared */
224 B3_FORCE_INLINE b3Scalar length2() const
225 {
226 return dot(*this);
227 }
228
229 /**@brief Return the length of the vector */
230 B3_FORCE_INLINE b3Scalar length() const
231 {
232 return b3Sqrt(length2());
233 }
234
235 /**@brief Return the distance squared between the ends of this and another vector
236 * This is symantically treating the vector like a point */
237 B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const;
238
239 /**@brief Return the distance between the ends of this and another vector
240 * This is symantically treating the vector like a point */
241 B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const;
242
243 B3_FORCE_INLINE b3Vector3& safeNormalize()
244 {
245 b3Scalar l2 = length2();
246 //triNormal.normalize();
247 if (l2 >= B3_EPSILON * B3_EPSILON)
248 {
249 (*this) /= b3Sqrt(l2);
250 }
251 else
252 {
253 setValue(1, 0, 0);
254 }
255 return *this;
256 }
257
258 /**@brief Normalize this vector
259 * x^2 + y^2 + z^2 = 1 */
260 B3_FORCE_INLINE b3Vector3& normalize()
261 {
262 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
263 // dot product first
264 __m128 vd = _mm_mul_ps(mVec128, mVec128);
265 __m128 z = _mm_movehl_ps(vd, vd);
266 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
267 vd = _mm_add_ss(vd, y);
268 vd = _mm_add_ss(vd, z);
269
270 #if 0
271 vd = _mm_sqrt_ss(vd);
272 vd = _mm_div_ss(b3v1110, vd);
273 vd = b3_splat_ps(vd, 0x80);
274 mVec128 = _mm_mul_ps(mVec128, vd);
275 #else
276
277 // NR step 1/sqrt(x) - vd is x, y is output
278 y = _mm_rsqrt_ss(vd); // estimate
279
280 // one step NR
281 z = b3v1_5;
282 vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
283 //x2 = vd;
284 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
285 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
286 z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
287
288 y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
289
290 y = b3_splat_ps(y, 0x80);
291 mVec128 = _mm_mul_ps(mVec128, y);
292
293 #endif
294
295 return *this;
296 #else
297 return *this /= length();
298 #endif
299 }
300
301 /**@brief Return a normalized version of this vector */
302 B3_FORCE_INLINE b3Vector3 normalized() const;
303
304 /**@brief Return a rotated version of this vector
305 * @param wAxis The axis to rotate about
306 * @param angle The angle to rotate by */
307 B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const;
308
309 /**@brief Return the angle between this and another vector
310 * @param v The other vector */
311 B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const
312 {
313 b3Scalar s = b3Sqrt(length2() * v.length2());
314 b3FullAssert(s != b3Scalar(0.0));
315 return b3Acos(dot(v) / s);
316 }
317
318 /**@brief Return a vector will the absolute values of each element */
319 B3_FORCE_INLINE b3Vector3 absolute() const
320 {
321 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
322 return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
323 #elif defined(B3_USE_NEON)
324 return b3Vector3(vabsq_f32(mVec128));
325 #else
326 return b3MakeVector3(
327 b3Fabs(m_floats[0]),
328 b3Fabs(m_floats[1]),
329 b3Fabs(m_floats[2]));
330 #endif
331 }
332
333 /**@brief Return the cross product between this and another vector
334 * @param v The other vector */
335 B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const
336 {
337 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
338 __m128 T, V;
339
340 T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
341 V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
342
343 V = _mm_mul_ps(V, mVec128);
344 T = _mm_mul_ps(T, v.mVec128);
345 V = _mm_sub_ps(V, T);
346
347 V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
348 return b3MakeVector3(V);
349 #elif defined(B3_USE_NEON)
350 float32x4_t T, V;
351 // form (Y, Z, X, _) of mVec128 and v.mVec128
352 float32x2_t Tlow = vget_low_f32(mVec128);
353 float32x2_t Vlow = vget_low_f32(v.mVec128);
354 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
355 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
356
357 V = vmulq_f32(V, mVec128);
358 T = vmulq_f32(T, v.mVec128);
359 V = vsubq_f32(V, T);
360 Vlow = vget_low_f32(V);
361 // form (Y, Z, X, _);
362 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
363 V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask);
364
365 return b3Vector3(V);
366 #else
367 return b3MakeVector3(
368 m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
369 m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
370 m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
371 #endif
372 }
373
374 B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const
375 {
376 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
377 // cross:
378 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
379 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
380
381 V = _mm_mul_ps(V, v1.mVec128);
382 T = _mm_mul_ps(T, v2.mVec128);
383 V = _mm_sub_ps(V, T);
384
385 V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3));
386
387 // dot:
388 V = _mm_mul_ps(V, mVec128);
389 __m128 z = _mm_movehl_ps(V, V);
390 __m128 y = _mm_shuffle_ps(V, V, 0x55);
391 V = _mm_add_ss(V, y);
392 V = _mm_add_ss(V, z);
393 return _mm_cvtss_f32(V);
394
395 #elif defined(B3_USE_NEON)
396 // cross:
397 float32x4_t T, V;
398 // form (Y, Z, X, _) of mVec128 and v.mVec128
399 float32x2_t Tlow = vget_low_f32(v1.mVec128);
400 float32x2_t Vlow = vget_low_f32(v2.mVec128);
401 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
402 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
403
404 V = vmulq_f32(V, v1.mVec128);
405 T = vmulq_f32(T, v2.mVec128);
406 V = vsubq_f32(V, T);
407 Vlow = vget_low_f32(V);
408 // form (Y, Z, X, _);
409 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
410
411 // dot:
412 V = vmulq_f32(mVec128, V);
413 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
414 x = vadd_f32(x, vget_high_f32(V));
415 return vget_lane_f32(x, 0);
416 #else
417 return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
418 m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
419 m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
420 #endif
421 }
422
423 /**@brief Return the axis with the smallest value
424 * Note return values are 0,1,2 for x, y, or z */
425 B3_FORCE_INLINE int minAxis() const
426 {
427 return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2);
428 }
429
430 /**@brief Return the axis with the largest value
431 * Note return values are 0,1,2 for x, y, or z */
432 B3_FORCE_INLINE int maxAxis() const
433 {
434 return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0);
435 }
436
437 B3_FORCE_INLINE int furthestAxis() const
438 {
439 return absolute().minAxis();
440 }
441
442 B3_FORCE_INLINE int closestAxis() const
443 {
444 return absolute().maxAxis();
445 }
446
447 B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt)
448 {
449 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
450 __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
451 b3Scalar s = b3Scalar(1.0) - rt;
452 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
453 vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
454 __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
455 vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
456 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
457 __m128 tmp3 = _mm_add_ps(r0, r1);
458 mVec128 = tmp3;
459 #elif defined(B3_USE_NEON)
460 float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
461 vl = vmulq_n_f32(vl, rt);
462 mVec128 = vaddq_f32(vl, v0.mVec128);
463 #else
464 b3Scalar s = b3Scalar(1.0) - rt;
465 m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
466 m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
467 m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
468 //don't do the unused w component
469 // m_co[3] = s * v0[3] + rt * v1[3];
470 #endif
471 }
472
473 /**@brief Return the linear interpolation between this and another vector
474 * @param v The other vector
475 * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
476 B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const
477 {
478 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
479 __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
480 vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
481 __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
482 vl = _mm_mul_ps(vl, vt);
483 vl = _mm_add_ps(vl, mVec128);
484
485 return b3MakeVector3(vl);
486 #elif defined(B3_USE_NEON)
487 float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
488 vl = vmulq_n_f32(vl, t);
489 vl = vaddq_f32(vl, mVec128);
490
491 return b3Vector3(vl);
492 #else
493 return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
494 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
495 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
496 #endif
497 }
498
499 /**@brief Elementwise multiply this vector by the other
500 * @param v The other vector */
501 B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v)
502 {
503 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
504 mVec128 = _mm_mul_ps(mVec128, v.mVec128);
505 #elif defined(B3_USE_NEON)
506 mVec128 = vmulq_f32(mVec128, v.mVec128);
507 #else
508 m_floats[0] *= v.m_floats[0];
509 m_floats[1] *= v.m_floats[1];
510 m_floats[2] *= v.m_floats[2];
511 #endif
512 return *this;
513 }
514
515 /**@brief Return the x value */
516 B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
517 /**@brief Return the y value */
518 B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
519 /**@brief Return the z value */
520 B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
521 /**@brief Return the w value */
522 B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
523
524 /**@brief Set the x value */
525 B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; };
526 /**@brief Set the y value */
527 B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; };
528 /**@brief Set the z value */
529 B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; };
530 /**@brief Set the w value */
531 B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; };
532
533 //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; }
534 //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
535 ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
536 B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; }
537 B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; }
538
539 B3_FORCE_INLINE bool operator==(const b3Vector3& other) const
540 {
541 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
542 return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
543 #else
544 return ((m_floats[3] == other.m_floats[3]) &&
545 (m_floats[2] == other.m_floats[2]) &&
546 (m_floats[1] == other.m_floats[1]) &&
547 (m_floats[0] == other.m_floats[0]));
548 #endif
549 }
550
551 B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const
552 {
553 return !(*this == other);
554 }
555
556 /**@brief Set each element to the max of the current values and the values of another b3Vector3
557 * @param other The other b3Vector3 to compare with
558 */
559 B3_FORCE_INLINE void setMax(const b3Vector3& other)
560 {
561 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
562 mVec128 = _mm_max_ps(mVec128, other.mVec128);
563 #elif defined(B3_USE_NEON)
564 mVec128 = vmaxq_f32(mVec128, other.mVec128);
565 #else
566 b3SetMax(m_floats[0], other.m_floats[0]);
567 b3SetMax(m_floats[1], other.m_floats[1]);
568 b3SetMax(m_floats[2], other.m_floats[2]);
569 b3SetMax(m_floats[3], other.m_floats[3]);
570 #endif
571 }
572
573 /**@brief Set each element to the min of the current values and the values of another b3Vector3
574 * @param other The other b3Vector3 to compare with
575 */
576 B3_FORCE_INLINE void setMin(const b3Vector3& other)
577 {
578 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
579 mVec128 = _mm_min_ps(mVec128, other.mVec128);
580 #elif defined(B3_USE_NEON)
581 mVec128 = vminq_f32(mVec128, other.mVec128);
582 #else
583 b3SetMin(m_floats[0], other.m_floats[0]);
584 b3SetMin(m_floats[1], other.m_floats[1]);
585 b3SetMin(m_floats[2], other.m_floats[2]);
586 b3SetMin(m_floats[3], other.m_floats[3]);
587 #endif
588 }
589
590 B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
591 {
592 m_floats[0] = _x;
593 m_floats[1] = _y;
594 m_floats[2] = _z;
595 m_floats[3] = b3Scalar(0.f);
596 }
597
598 void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const
599 {
600 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
601
602 __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask);
603 __m128 V0 = _mm_xor_ps(b3vMzeroMask, V);
604 __m128 V2 = _mm_movelh_ps(V0, V);
605
606 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
607
608 V0 = _mm_shuffle_ps(V0, V, 0xDB);
609 V2 = _mm_shuffle_ps(V2, V, 0xF9);
610
611 v0->mVec128 = V0;
612 v1->mVec128 = V1;
613 v2->mVec128 = V2;
614 #else
615 v0->setValue(0., -getZ(), getY());
616 v1->setValue(getZ(), 0., -getX());
617 v2->setValue(-getY(), getX(), 0.);
618 #endif
619 }
620
621 void setZero()
622 {
623 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
624 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
625 #elif defined(B3_USE_NEON)
626 int32x4_t vi = vdupq_n_s32(0);
627 mVec128 = vreinterpretq_f32_s32(vi);
628 #else
629 setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));
630 #endif
631 }
632
633 B3_FORCE_INLINE bool isZero() const
634 {
635 return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0);
636 }
637
638 B3_FORCE_INLINE bool fuzzyZero() const
639 {
640 return length2() < B3_EPSILON;
641 }
642
643 B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const;
644
645 B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn);
646
647 B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const;
648
649 B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn);
650
651 B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const;
652
653 B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn);
654
655 /**@brief returns index of maximum dot product between this and vectors in array[]
656 * @param array The other vectors
657 * @param array_count The number of other vectors
658 * @param dotOut The maximum dot product */
659 B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
660
661 /**@brief returns index of minimum dot product between this and vectors in array[]
662 * @param array The other vectors
663 * @param array_count The number of other vectors
664 * @param dotOut The minimum dot product */
665 B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
666
667 /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */
668 B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const
669 {
670 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
671
672 __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128);
673 __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128);
674 __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128);
675 __m128 b0 = _mm_unpacklo_ps(a0, a1);
676 __m128 b1 = _mm_unpackhi_ps(a0, a1);
677 __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps());
678 __m128 r = _mm_movelh_ps(b0, b2);
679 r = _mm_add_ps(r, _mm_movehl_ps(b2, b0));
680 a2 = _mm_and_ps(a2, b3vxyzMaskf);
681 r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1))));
682 return b3MakeVector3(r);
683
684 #elif defined(B3_USE_NEON)
685 static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0};
686 float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128);
687 float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128);
688 float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128);
689 float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1));
690 a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask);
691 float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]);
692 float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
693 return b3Vector3(vcombine_f32(b0, b1));
694 #else
695 return b3MakeVector3(dot(v0), dot(v1), dot(v2));
696 #endif
697 }
698 };
699
700 /**@brief Return the sum of two vectors (Point symantics)*/
701 B3_FORCE_INLINE b3Vector3
702 operator+(const b3Vector3& v1, const b3Vector3& v2)
703 {
704 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
705 return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
706 #elif defined(B3_USE_NEON)
707 return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
708 #else
709 return b3MakeVector3(
710 v1.m_floats[0] + v2.m_floats[0],
711 v1.m_floats[1] + v2.m_floats[1],
712 v1.m_floats[2] + v2.m_floats[2]);
713 #endif
714 }
715
716 /**@brief Return the elementwise product of two vectors */
717 B3_FORCE_INLINE b3Vector3
718 operator*(const b3Vector3& v1, const b3Vector3& v2)
719 {
720 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
721 return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
722 #elif defined(B3_USE_NEON)
723 return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
724 #else
725 return b3MakeVector3(
726 v1.m_floats[0] * v2.m_floats[0],
727 v1.m_floats[1] * v2.m_floats[1],
728 v1.m_floats[2] * v2.m_floats[2]);
729 #endif
730 }
731
732 /**@brief Return the difference between two vectors */
733 B3_FORCE_INLINE b3Vector3
734 operator-(const b3Vector3& v1, const b3Vector3& v2)
735 {
736 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
737
738 // without _mm_and_ps this code causes slowdown in Concave moving
739 __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
740 return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
741 #elif defined(B3_USE_NEON)
742 float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
743 return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
744 #else
745 return b3MakeVector3(
746 v1.m_floats[0] - v2.m_floats[0],
747 v1.m_floats[1] - v2.m_floats[1],
748 v1.m_floats[2] - v2.m_floats[2]);
749 #endif
750 }
751
752 /**@brief Return the negative of the vector */
753 B3_FORCE_INLINE b3Vector3
754 operator-(const b3Vector3& v)
755 {
756 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
757 __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
758 return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
759 #elif defined(B3_USE_NEON)
760 return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
761 #else
762 return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
763 #endif
764 }
765
766 /**@brief Return the vector scaled by s */
767 B3_FORCE_INLINE b3Vector3
768 operator*(const b3Vector3& v, const b3Scalar& s)
769 {
770 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
771 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
772 vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
773 return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
774 #elif defined(B3_USE_NEON)
775 float32x4_t r = vmulq_n_f32(v.mVec128, s);
776 return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
777 #else
778 return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
779 #endif
780 }
781
782 /**@brief Return the vector scaled by s */
783 B3_FORCE_INLINE b3Vector3
784 operator*(const b3Scalar& s, const b3Vector3& v)
785 {
786 return v * s;
787 }
788
789 /**@brief Return the vector inversely scaled by s */
790 B3_FORCE_INLINE b3Vector3
791 operator/(const b3Vector3& v, const b3Scalar& s)
792 {
793 b3FullAssert(s != b3Scalar(0.0));
794 #if 0 //defined(B3_USE_SSE_IN_API)
795 // this code is not faster !
796 __m128 vs = _mm_load_ss(&s);
797 vs = _mm_div_ss(b3v1110, vs);
798 vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
799
800 return b3Vector3(_mm_mul_ps(v.mVec128, vs));
801 #else
802 return v * (b3Scalar(1.0) / s);
803 #endif
804 }
805
806 /**@brief Return the vector inversely scaled by s */
807 B3_FORCE_INLINE b3Vector3
808 operator/(const b3Vector3& v1, const b3Vector3& v2)
809 {
810 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
811 __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
812 vec = _mm_and_ps(vec, b3vFFF0fMask);
813 return b3MakeVector3(vec);
814 #elif defined(B3_USE_NEON)
815 float32x4_t x, y, v, m;
816
817 x = v1.mVec128;
818 y = v2.mVec128;
819
820 v = vrecpeq_f32(y); // v ~ 1/y
821 m = vrecpsq_f32(y, v); // m = (2-v*y)
822 v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
823 m = vrecpsq_f32(y, v); // mm = (2-vv*y)
824 v = vmulq_f32(v, x); // x*vv
825 v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
826
827 return b3Vector3(v);
828 #else
829 return b3MakeVector3(
830 v1.m_floats[0] / v2.m_floats[0],
831 v1.m_floats[1] / v2.m_floats[1],
832 v1.m_floats[2] / v2.m_floats[2]);
833 #endif
834 }
835
836 /**@brief Return the dot product between two vectors */
837 B3_FORCE_INLINE b3Scalar
b3Dot(const b3Vector3 & v1,const b3Vector3 & v2)838 b3Dot(const b3Vector3& v1, const b3Vector3& v2)
839 {
840 return v1.dot(v2);
841 }
842
843 /**@brief Return the distance squared between two vectors */
844 B3_FORCE_INLINE b3Scalar
b3Distance2(const b3Vector3 & v1,const b3Vector3 & v2)845 b3Distance2(const b3Vector3& v1, const b3Vector3& v2)
846 {
847 return v1.distance2(v2);
848 }
849
850 /**@brief Return the distance between two vectors */
851 B3_FORCE_INLINE b3Scalar
b3Distance(const b3Vector3 & v1,const b3Vector3 & v2)852 b3Distance(const b3Vector3& v1, const b3Vector3& v2)
853 {
854 return v1.distance(v2);
855 }
856
857 /**@brief Return the angle between two vectors */
858 B3_FORCE_INLINE b3Scalar
b3Angle(const b3Vector3 & v1,const b3Vector3 & v2)859 b3Angle(const b3Vector3& v1, const b3Vector3& v2)
860 {
861 return v1.angle(v2);
862 }
863
864 /**@brief Return the cross product of two vectors */
865 B3_FORCE_INLINE b3Vector3
b3Cross(const b3Vector3 & v1,const b3Vector3 & v2)866 b3Cross(const b3Vector3& v1, const b3Vector3& v2)
867 {
868 return v1.cross(v2);
869 }
870
871 B3_FORCE_INLINE b3Scalar
b3Triple(const b3Vector3 & v1,const b3Vector3 & v2,const b3Vector3 & v3)872 b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3)
873 {
874 return v1.triple(v2, v3);
875 }
876
877 /**@brief Return the linear interpolation between two vectors
878 * @param v1 One vector
879 * @param v2 The other vector
880 * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
881 B3_FORCE_INLINE b3Vector3
b3Lerp(const b3Vector3 & v1,const b3Vector3 & v2,const b3Scalar & t)882 b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t)
883 {
884 return v1.lerp(v2, t);
885 }
886
distance2(const b3Vector3 & v)887 B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const
888 {
889 return (v - *this).length2();
890 }
891
distance(const b3Vector3 & v)892 B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const
893 {
894 return (v - *this).length();
895 }
896
normalized()897 B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const
898 {
899 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
900 b3Vector3 norm = *this;
901
902 return norm.normalize();
903 #else
904 return *this / length();
905 #endif
906 }
907
rotate(const b3Vector3 & wAxis,const b3Scalar _angle)908 B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const
909 {
910 // wAxis must be a unit lenght vector
911
912 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
913
914 __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
915 b3Scalar ssin = b3Sin(_angle);
916 __m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128;
917 O = _mm_and_ps(O, b3vFFF0fMask);
918 b3Scalar scos = b3Cos(_angle);
919
920 __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
921 __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
922
923 __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0)
924 __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0)
925 O = _mm_add_ps(O, Y);
926 vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0)
927 O = _mm_add_ps(O, Z);
928 vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0)
929
930 vsin = vsin * C;
931 O = O * wAxis.mVec128;
932 __m128 X = mVec128 - O;
933
934 O = O + vsin;
935 vcos = vcos * X;
936 O = O + vcos;
937
938 return b3MakeVector3(O);
939 #else
940 b3Vector3 o = wAxis * wAxis.dot(*this);
941 b3Vector3 _x = *this - o;
942 b3Vector3 _y;
943
944 _y = wAxis.cross(*this);
945
946 return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle));
947 #endif
948 }
949
maxDot(const b3Vector3 * array,long array_count,b3Scalar & dotOut)950 B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
951 {
952 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
953 #if defined _WIN32 || defined(B3_USE_SSE)
954 const long scalar_cutoff = 10;
955 long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
956 #elif defined B3_USE_NEON
957 const long scalar_cutoff = 4;
958 extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
959 #endif
960 if (array_count < scalar_cutoff)
961 #else
962
963 #endif //B3_USE_SSE || B3_USE_NEON
964 {
965 b3Scalar maxDot = -B3_INFINITY;
966 int i = 0;
967 int ptIndex = -1;
968 for (i = 0; i < array_count; i++)
969 {
970 b3Scalar dot = array[i].dot(*this);
971
972 if (dot > maxDot)
973 {
974 maxDot = dot;
975 ptIndex = i;
976 }
977 }
978
979 b3Assert(ptIndex >= 0);
980 if (ptIndex < 0)
981 {
982 ptIndex = 0;
983 }
984 dotOut = maxDot;
985 return ptIndex;
986 }
987 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
988 return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
989 #endif
990 }
991
minDot(const b3Vector3 * array,long array_count,b3Scalar & dotOut)992 B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
993 {
994 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
995 #if defined B3_USE_SSE
996 const long scalar_cutoff = 10;
997 long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
998 #elif defined B3_USE_NEON
999 const long scalar_cutoff = 4;
1000 extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
1001 #else
1002 #error unhandled arch!
1003 #endif
1004
1005 if (array_count < scalar_cutoff)
1006 #endif //B3_USE_SSE || B3_USE_NEON
1007 {
1008 b3Scalar minDot = B3_INFINITY;
1009 int i = 0;
1010 int ptIndex = -1;
1011
1012 for (i = 0; i < array_count; i++)
1013 {
1014 b3Scalar dot = array[i].dot(*this);
1015
1016 if (dot < minDot)
1017 {
1018 minDot = dot;
1019 ptIndex = i;
1020 }
1021 }
1022
1023 dotOut = minDot;
1024
1025 return ptIndex;
1026 }
1027 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
1028 return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
1029 #endif
1030 }
1031
1032 class b3Vector4 : public b3Vector3
1033 {
1034 public:
absolute4()1035 B3_FORCE_INLINE b3Vector4 absolute4() const
1036 {
1037 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
1038 return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
1039 #elif defined(B3_USE_NEON)
1040 return b3Vector4(vabsq_f32(mVec128));
1041 #else
1042 return b3MakeVector4(
1043 b3Fabs(m_floats[0]),
1044 b3Fabs(m_floats[1]),
1045 b3Fabs(m_floats[2]),
1046 b3Fabs(m_floats[3]));
1047 #endif
1048 }
1049
getW()1050 b3Scalar getW() const { return m_floats[3]; }
1051
maxAxis4()1052 B3_FORCE_INLINE int maxAxis4() const
1053 {
1054 int maxIndex = -1;
1055 b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT);
1056 if (m_floats[0] > maxVal)
1057 {
1058 maxIndex = 0;
1059 maxVal = m_floats[0];
1060 }
1061 if (m_floats[1] > maxVal)
1062 {
1063 maxIndex = 1;
1064 maxVal = m_floats[1];
1065 }
1066 if (m_floats[2] > maxVal)
1067 {
1068 maxIndex = 2;
1069 maxVal = m_floats[2];
1070 }
1071 if (m_floats[3] > maxVal)
1072 {
1073 maxIndex = 3;
1074 }
1075
1076 return maxIndex;
1077 }
1078
minAxis4()1079 B3_FORCE_INLINE int minAxis4() const
1080 {
1081 int minIndex = -1;
1082 b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT);
1083 if (m_floats[0] < minVal)
1084 {
1085 minIndex = 0;
1086 minVal = m_floats[0];
1087 }
1088 if (m_floats[1] < minVal)
1089 {
1090 minIndex = 1;
1091 minVal = m_floats[1];
1092 }
1093 if (m_floats[2] < minVal)
1094 {
1095 minIndex = 2;
1096 minVal = m_floats[2];
1097 }
1098 if (m_floats[3] < minVal)
1099 {
1100 minIndex = 3;
1101 minVal = m_floats[3];
1102 }
1103
1104 return minIndex;
1105 }
1106
closestAxis4()1107 B3_FORCE_INLINE int closestAxis4() const
1108 {
1109 return absolute4().maxAxis4();
1110 }
1111
1112 /**@brief Set x,y,z and zero w
1113 * @param x Value of x
1114 * @param y Value of y
1115 * @param z Value of z
1116 */
1117
1118 /* void getValue(b3Scalar *m) const
1119 {
1120 m[0] = m_floats[0];
1121 m[1] = m_floats[1];
1122 m[2] =m_floats[2];
1123 }
1124 */
1125 /**@brief Set the values
1126 * @param x Value of x
1127 * @param y Value of y
1128 * @param z Value of z
1129 * @param w Value of w
1130 */
setValue(const b3Scalar & _x,const b3Scalar & _y,const b3Scalar & _z,const b3Scalar & _w)1131 B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
1132 {
1133 m_floats[0] = _x;
1134 m_floats[1] = _y;
1135 m_floats[2] = _z;
1136 m_floats[3] = _w;
1137 }
1138 };
1139
1140 ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
b3SwapScalarEndian(const b3Scalar & sourceVal,b3Scalar & destVal)1141 B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal)
1142 {
1143 #ifdef B3_USE_DOUBLE_PRECISION
1144 unsigned char* dest = (unsigned char*)&destVal;
1145 unsigned char* src = (unsigned char*)&sourceVal;
1146 dest[0] = src[7];
1147 dest[1] = src[6];
1148 dest[2] = src[5];
1149 dest[3] = src[4];
1150 dest[4] = src[3];
1151 dest[5] = src[2];
1152 dest[6] = src[1];
1153 dest[7] = src[0];
1154 #else
1155 unsigned char* dest = (unsigned char*)&destVal;
1156 unsigned char* src = (unsigned char*)&sourceVal;
1157 dest[0] = src[3];
1158 dest[1] = src[2];
1159 dest[2] = src[1];
1160 dest[3] = src[0];
1161 #endif //B3_USE_DOUBLE_PRECISION
1162 }
1163 ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
b3SwapVector3Endian(const b3Vector3 & sourceVec,b3Vector3 & destVec)1164 B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec)
1165 {
1166 for (int i = 0; i < 4; i++)
1167 {
1168 b3SwapScalarEndian(sourceVec[i], destVec[i]);
1169 }
1170 }
1171
1172 ///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
b3UnSwapVector3Endian(b3Vector3 & vector)1173 B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector)
1174 {
1175 b3Vector3 swappedVec;
1176 for (int i = 0; i < 4; i++)
1177 {
1178 b3SwapScalarEndian(vector[i], swappedVec[i]);
1179 }
1180 vector = swappedVec;
1181 }
1182
1183 template <class T>
b3PlaneSpace1(const T & n,T & p,T & q)1184 B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q)
1185 {
1186 if (b3Fabs(n[2]) > B3_SQRT12)
1187 {
1188 // choose p in y-z plane
1189 b3Scalar a = n[1] * n[1] + n[2] * n[2];
1190 b3Scalar k = b3RecipSqrt(a);
1191 p[0] = 0;
1192 p[1] = -n[2] * k;
1193 p[2] = n[1] * k;
1194 // set q = n x p
1195 q[0] = a * k;
1196 q[1] = -n[0] * p[2];
1197 q[2] = n[0] * p[1];
1198 }
1199 else
1200 {
1201 // choose p in x-y plane
1202 b3Scalar a = n[0] * n[0] + n[1] * n[1];
1203 b3Scalar k = b3RecipSqrt(a);
1204 p[0] = -n[1] * k;
1205 p[1] = n[0] * k;
1206 p[2] = 0;
1207 // set q = n x p
1208 q[0] = -n[2] * p[1];
1209 q[1] = n[2] * p[0];
1210 q[2] = a * k;
1211 }
1212 }
1213
1214 struct b3Vector3FloatData
1215 {
1216 float m_floats[4];
1217 };
1218
1219 struct b3Vector3DoubleData
1220 {
1221 double m_floats[4];
1222 };
1223
serializeFloat(struct b3Vector3FloatData & dataOut)1224 B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const
1225 {
1226 ///could also do a memcpy, check if it is worth it
1227 for (int i = 0; i < 4; i++)
1228 dataOut.m_floats[i] = float(m_floats[i]);
1229 }
1230
deSerializeFloat(const struct b3Vector3FloatData & dataIn)1231 B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn)
1232 {
1233 for (int i = 0; i < 4; i++)
1234 m_floats[i] = b3Scalar(dataIn.m_floats[i]);
1235 }
1236
serializeDouble(struct b3Vector3DoubleData & dataOut)1237 B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const
1238 {
1239 ///could also do a memcpy, check if it is worth it
1240 for (int i = 0; i < 4; i++)
1241 dataOut.m_floats[i] = double(m_floats[i]);
1242 }
1243
deSerializeDouble(const struct b3Vector3DoubleData & dataIn)1244 B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn)
1245 {
1246 for (int i = 0; i < 4; i++)
1247 m_floats[i] = b3Scalar(dataIn.m_floats[i]);
1248 }
1249
serialize(struct b3Vector3Data & dataOut)1250 B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const
1251 {
1252 ///could also do a memcpy, check if it is worth it
1253 for (int i = 0; i < 4; i++)
1254 dataOut.m_floats[i] = m_floats[i];
1255 }
1256
deSerialize(const struct b3Vector3Data & dataIn)1257 B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn)
1258 {
1259 for (int i = 0; i < 4; i++)
1260 m_floats[i] = dataIn.m_floats[i];
1261 }
1262
b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z)1263 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z)
1264 {
1265 b3Vector3 tmp;
1266 tmp.setValue(x, y, z);
1267 return tmp;
1268 }
1269
b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w)1270 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
1271 {
1272 b3Vector3 tmp;
1273 tmp.setValue(x, y, z);
1274 tmp.w = w;
1275 return tmp;
1276 }
1277
b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w)1278 inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
1279 {
1280 b3Vector4 tmp;
1281 tmp.setValue(x, y, z, w);
1282 return tmp;
1283 }
1284
1285 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
1286
b3MakeVector3(b3SimdFloat4 v)1287 inline b3Vector3 b3MakeVector3(b3SimdFloat4 v)
1288 {
1289 b3Vector3 tmp;
1290 tmp.set128(v);
1291 return tmp;
1292 }
1293
b3MakeVector4(b3SimdFloat4 vec)1294 inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
1295 {
1296 b3Vector4 tmp;
1297 tmp.set128(vec);
1298 return tmp;
1299 }
1300
1301 #endif
1302
1303 #endif //B3_VECTOR3_H
1304