1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the Qt3D module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #ifndef QT3DCORE_VECTOR4D_SSE_P_H
41 #define QT3DCORE_VECTOR4D_SSE_P_H
42 
43 //
44 //  W A R N I N G
45 //  -------------
46 //
47 // This file is not part of the Qt3D API.  It exists purely as an
48 // implementation detail.  This header file may change from version to
49 // version without notice, or even be removed.
50 //
51 // We mean it.
52 //
53 
54 #include <Qt3DCore/private/vector3d_p.h>
55 #include <QtGui/qvector4d.h>
56 
57 #ifdef QT_COMPILER_SUPPORTS_SSE2
58 
59 QT_BEGIN_NAMESPACE
60 
61 namespace Qt3DCore {
62 
63 class Matrix4x4_SSE;
64 class Matrix4x4_AVX2;
65 
66 class Vector4D_SSE
67 {
68 public:
Vector4D_SSE()69     Q_ALWAYS_INLINE Vector4D_SSE()
70         : m_xyzw(_mm_setzero_ps())
71     {
72     }
73 
Vector4D_SSE(Qt::Initialization)74     explicit Q_ALWAYS_INLINE Vector4D_SSE(Qt::Initialization) {}
75 
Vector4D_SSE(float x,float y,float z,float w)76     explicit Q_ALWAYS_INLINE Vector4D_SSE(float x, float y, float z, float w)
77         : m_xyzw(_mm_set_ps(w, z, y, x))
78     {
79     }
80 
Vector4D_SSE(QVector4D v)81     explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector4D v)
82         : m_xyzw(_mm_set_ps(v.w(), v.z(), v.y(), v.x()))
83     {
84     }
85 
86     explicit Q_ALWAYS_INLINE Vector4D_SSE(const Vector3D_SSE &vec3, float w = 0.0f)
87         : m_xyzw(vec3.m_xyzw)
88     {
89         setW(w);
90     }
91 
92     explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector3D v, float w = 0.0f)
93         : m_xyzw(_mm_set_ps(w, v.z(), v.y(), v.x()))
94     {
95     }
96 
97     Q_ALWAYS_INLINE Vector4D_SSE &operator+=(Vector4D_SSE vector)
98     {
99         m_xyzw = _mm_add_ps(m_xyzw, vector.m_xyzw);
100         return *this;
101     }
102 
103     Q_ALWAYS_INLINE Vector4D_SSE &operator-=(Vector4D_SSE vector)
104     {
105         m_xyzw = _mm_sub_ps(m_xyzw, vector.m_xyzw);
106         return *this;
107     }
108 
109     Q_ALWAYS_INLINE Vector4D_SSE &operator*=(Vector4D_SSE vector)
110     {
111         m_xyzw = _mm_mul_ps(m_xyzw, vector.m_xyzw);
112         return *this;
113     }
114 
115     Q_ALWAYS_INLINE Vector4D_SSE &operator/=(Vector4D_SSE vector)
116     {
117         m_xyzw = _mm_div_ps(m_xyzw, vector.m_xyzw);
118         return *this;
119     }
120 
121     Q_ALWAYS_INLINE Vector4D_SSE &operator*=(float factor)
122     {
123         m_xyzw = _mm_mul_ps(m_xyzw, _mm_set1_ps(factor));
124         return *this;
125     }
126 
127     Q_ALWAYS_INLINE Vector4D_SSE &operator/=(float factor)
128     {
129         m_xyzw = _mm_div_ps(m_xyzw, _mm_set1_ps(factor));
130         return *this;
131     }
132 
133     Q_ALWAYS_INLINE bool operator==(Vector4D_SSE other) const
134     {
135         // 0b1111 == 0xf
136         return (_mm_movemask_ps(_mm_cmpeq_ps(m_xyzw, other.m_xyzw)) == 0xf);
137     }
138 
139     Q_ALWAYS_INLINE bool operator!=(Vector4D_SSE other) const
140     {
141         return !(*this == other);
142     }
143 
toQVector4D()144     Q_ALWAYS_INLINE QVector4D toQVector4D() const
145     {
146         return QVector4D(x(), y(), z(), w());
147     }
148 
149     // TODO: Uncomment when we introduce Vector3D_SSE
150     //Q_ALWAYS_INLINE Vector3D_SSE toVector3D() const { return Vector3D_SSE(*this); }
151 
lengthSquared()152     Q_ALWAYS_INLINE float lengthSquared() const
153     {
154         return dotProduct(*this, *this);
155     }
156 
length()157     Q_ALWAYS_INLINE float length() const
158     {
159         return sqrt(dotProduct(*this, *this));
160     }
161 
normalize()162     Q_ALWAYS_INLINE void normalize()
163     {
164         const float len = length();
165         m_xyzw = _mm_div_ps(m_xyzw, _mm_set_ps1(len));
166     }
167 
normalized()168     Q_ALWAYS_INLINE Vector4D_SSE normalized() const
169     {
170         Vector4D_SSE v = *this;
171         v.normalize();
172         return v;
173     }
174 
isNull()175     Q_ALWAYS_INLINE bool isNull() const
176     {
177         // 0b1111 == 0xf
178         return _mm_movemask_ps(_mm_cmpeq_ps(m_xyzw, _mm_setzero_ps())) == 0xf;
179     }
180 
x()181     Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(m_xyzw); }
182 
y()183     Q_ALWAYS_INLINE float y() const
184     {
185         // 0b01010101 = 0x55
186         return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, 0x55));
187     }
188 
z()189     Q_ALWAYS_INLINE float z() const
190     {
191         // 0b10101010 = 0xaa
192         return _mm_cvtss_f32(_mm_unpackhi_ps(m_xyzw, m_xyzw));
193     }
194 
w()195     Q_ALWAYS_INLINE float w() const
196     {
197         // 0b11111111 = 0xff
198         return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, 0xff));
199     }
200 
setX(float x)201     Q_ALWAYS_INLINE void setX(float x)
202     {
203         m_xyzw = _mm_move_ss(m_xyzw, _mm_set_ss(x));
204     }
205 
setY(float y)206     Q_ALWAYS_INLINE void setY(float y)
207     {
208         // m_xyzw = a, b, c, d
209 
210         // y, y, y, y
211         const __m128 yVec = _mm_set_ps1(y);
212 
213         // y, y, a, a
214         // 0b00000000 == 0x0
215         const __m128 yaVec = _mm_shuffle_ps(yVec, m_xyzw, 0x0);
216 
217         // a, y, c, d
218         // 0b11100010 == 0xe2
219         m_xyzw = _mm_shuffle_ps(yaVec, m_xyzw, 0xe2);
220     }
221 
setZ(float z)222     Q_ALWAYS_INLINE void setZ(float z)
223     {
224         // m_xyzw = a, b, c, d
225 
226         // z, z, z, z
227         const __m128 zVec = _mm_set_ps1(z);
228 
229         // z, z, d, d
230         // 0b11110000 == 0xf0
231         const __m128 zdVec = _mm_shuffle_ps(zVec, m_xyzw, 0xf0);
232 
233         // a, b, z, d
234         // 0b10000100 == 0x84
235         m_xyzw = _mm_shuffle_ps(m_xyzw, zdVec, 0x84);
236     }
237 
setW(float w)238     Q_ALWAYS_INLINE void setW(float w)
239     {
240 #ifdef __SSE4_1__
241         const __m128 wVec = _mm_set_ss(w);
242         // insert element 0 of wVec into position 3 in vec3, don't zero anything
243         m_xyzw = _mm_insert_ps(m_xyzw, wVec, 0x30);
244 #else
245         // m_xyzw = a, b, c, d
246 
247         // w, w, w, w
248         const __m128 wVec = _mm_set_ps1(w);
249 
250         // c, c, w, w
251         const __m128 cwVec = _mm_shuffle_ps(m_xyzw, wVec, _MM_SHUFFLE(0, 0, 2, 2));
252 
253         // a, b, c, w
254         m_xyzw = _mm_shuffle_ps(m_xyzw, cwVec, _MM_SHUFFLE(2, 0, 1, 0));
255 #endif
256     }
257 
258     Q_ALWAYS_INLINE float operator[](int idx) const
259     {
260         Q_DECL_ALIGN(16) float vec[4];
261         _mm_store_ps(vec, m_xyzw);
262         return vec[idx];
263     }
264 
265     struct DigitWrapper
266     {
DigitWrapperDigitWrapper267         explicit DigitWrapper(int idx, Vector4D_SSE *vec)
268             : m_vec(vec)
269             , m_idx(idx)
270         {}
271 
272         operator float() const
273         {
274             switch (m_idx) {
275             case 0:
276                 return m_vec->x();
277             case 1:
278                 return m_vec->y();
279             case 2:
280                 return m_vec->z();
281             case 3:
282                 return m_vec->w();
283             default:
284                 Q_UNREACHABLE();
285                 return 0.0f;
286             }
287         }
288         void operator =(float value)
289         {
290             switch (m_idx) {
291             case 0:
292                 m_vec->setX(value);
293                 break;
294             case 1:
295                 m_vec->setY(value);
296                 break;
297             case 2:
298                 m_vec->setZ(value);
299                 break;
300             case 3:
301                 m_vec->setW(value);
302                 break;
303             default:
304                 Q_UNREACHABLE();
305             }
306         }
307 
308     private:
309         Vector4D_SSE *m_vec;
310         const int m_idx;
311     };
312 
313     Q_ALWAYS_INLINE DigitWrapper operator[](int idx)
314     {
315         return DigitWrapper(idx, this);
316     }
317 
dotProduct(Vector4D_SSE a,Vector4D_SSE b)318     static Q_ALWAYS_INLINE float dotProduct(Vector4D_SSE a, Vector4D_SSE b)
319     {
320 #if defined(__SSE4_1__)
321         // 0b11111111 = 0xff
322         return _mm_cvtss_f32(_mm_dp_ps(a.m_xyzw, b.m_xyzw, 0xff));
323 #elif defined(__SSE3__)
324         const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
325         // a + b, c + d, a + d, c + d
326         const __m128 partialSum = _mm_hadd_ps(mult, mult);
327         // c + d, ......
328         // 0x00000001 =
329         const __m128 partialSumShuffle = _mm_shuffle_ps(partialSum, partialSum, 0x1);
330         return _mm_cvtss_f32(_mm_hadd_ps(partialSum, partialSumShuffle));
331 #else
332         const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
333         // (multX, multY, 0, 0) + (multZ, multW, 0, 0) -> (multX + multZ, multY + multW, 0, 0)
334         // 0b00001110 == 0xe
335         const __m128 shuffled = _mm_shuffle_ps(mult, mult, 0xe);
336         __m128 result = _mm_add_ps(shuffled, mult);
337         // (multX + multZ, 0, 0, 0) + (multY + multW, 0, 0, 0);
338         // 0b00000001 == 0x1
339         const __m128 shuffled2 = _mm_shuffle_ps(result, result, 0x1);
340         result = _mm_add_ps(result, shuffled2);
341         return _mm_cvtss_f32(result);
342 #endif
343     }
344 
345     friend class Matrix4x4_SSE;
346 
347 #ifdef __AVX2__
348     friend class Matrix4x4_AVX2;
349     friend Vector4D_SSE operator*(const Vector4D_SSE &vector, const Matrix4x4_AVX2 &matrix);
350     friend Vector4D_SSE operator*(const Matrix4x4_AVX2 &matrix, const Vector4D_SSE &vector);
351 #endif
352 
353     friend class Vector3D_SSE;
354     friend Vector4D_SSE operator*(const Vector4D_SSE &vector, const Matrix4x4_SSE &matrix);
355     friend Vector4D_SSE operator*(const Matrix4x4_SSE  &matrix, const Vector4D_SSE &vector);
356 
357     friend Q_ALWAYS_INLINE const Vector4D_SSE operator+(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 += v2; }
358     friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 -= v2; }
359     friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(float factor, Vector4D_SSE vector) { return vector *= factor; }
360     friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(Vector4D_SSE vector, float factor) { return vector *= factor; }
361     friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 *= v2; }
362     friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE vector)
363     {
364         Vector4D_SSE c(Qt::Uninitialized);
365 
366         c.m_xyzw = _mm_xor_ps(vector.m_xyzw, _mm_set1_ps(-0.0f));
367 
368         return c;
369     }
370 
371     friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, float divisor) { return vector /= divisor; }
372     friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, Vector4D_SSE divisor) { return vector /= divisor; }
373 
374     friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Vector4D_SSE &v);
qFuzzyCompare(const Vector4D_SSE & v1,const Vector4D_SSE & v2)375     friend Q_ALWAYS_INLINE bool qFuzzyCompare(const Vector4D_SSE& v1, const Vector4D_SSE& v2)
376     {
377         return ::qFuzzyCompare(v1.x(), v2.x()) &&
378                ::qFuzzyCompare(v1.y(), v2.y()) &&
379                ::qFuzzyCompare(v1.z(), v2.z()) &&
380                ::qFuzzyCompare(v1.w(), v2.w());
381     }
382 
383 private:
384     // Q_DECL_ALIGN(16) float m[4];// for SSE support
385     __m128 m_xyzw;
386 };
387 
388 } // Qt3DCore
389 
390 Q_DECLARE_TYPEINFO(Qt3DCore::Vector4D_SSE, Q_PRIMITIVE_TYPE);
391 
392 QT_END_NAMESPACE
393 
394 Q_DECLARE_METATYPE(Qt3DCore::Vector4D_SSE)
395 
396 #endif // QT_COMPILER_SUPPORTS_SSE2
397 
398 #endif // QT3DCORE_VECTOR4D_SSE_P_H
399