1 /********************************************************************************
2 * *
3 * D o u b l e - P r e c i s i o n 3 x 3 M a t r i x *
4 * *
5 *********************************************************************************
6 * Copyright (C) 2003,2021 by Jeroen van der Zijp. All Rights Reserved. *
7 *********************************************************************************
8 * This library is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU Lesser General Public License as published by *
10 * the Free Software Foundation; either version 3 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This library is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU Lesser General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU Lesser General Public License *
19 * along with this program. If not, see <http://www.gnu.org/licenses/> *
20 ********************************************************************************/
21 #include "xincs.h"
22 #include "fxver.h"
23 #include "fxdefs.h"
24 #include "fxmath.h"
25 #include "FXArray.h"
26 #include "FXHash.h"
27 #include "FXStream.h"
28 #include "FXObject.h"
29 #include "FXVec2d.h"
30 #include "FXVec3d.h"
31 #include "FXVec4d.h"
32 #include "FXQuatd.h"
33 #include "FXMat2d.h"
34 #include "FXMat3d.h"
35 #include "FXMat4d.h"
36
37
38 /*
39 Notes:
40 - Transformations pre-multiply.
41 - Goal is same effect as OpenGL.
42 */
43
44
45 using namespace FX;
46
47 /*******************************************************************************/
48
49 namespace FX {
50
51 // Mask bottom 3 elements
52 #define MMM _mm256_set_epi64x(0,~0,~0,~0)
53
54 // More palatable syntax
55 #define _mm256_storeu_sd(p,x) _mm_storel_pd(p,_mm256_castpd256_pd128(x))
56 #define _mm256_loadu_sd(p) _mm256_castpd128_pd256(_mm_load_sd(p))
57
58 #define _mm256_store_sd(p,x) _mm_storel_pd(p,_mm256_castpd256_pd128(x))
59 #define _mm256_load_sd(p) _mm256_castpd128_pd256(_mm_load_sd(p))
60
61 #define _mm256_div_sd(a,b) _mm256_castpd128_pd256(_mm_div_sd(_mm256_castpd256_pd128(a),_mm256_castpd256_pd128(b)))
62 #define _mm256_mul_sd(a,b) _mm256_castpd128_pd256(_mm_mul_sd(_mm256_castpd256_pd128(a),_mm256_castpd256_pd128(b)))
63 #define _mm256_add_sd(a,b) _mm256_castpd128_pd256(_mm_add_sd(_mm256_castpd256_pd128(a),_mm256_castpd256_pd128(b)))
64 #define _mm256_sub_sd(a,b) _mm256_castpd128_pd256(_mm_sub_sd(_mm256_castpd256_pd128(a),_mm256_castpd256_pd128(b)))
65
66 #define _mm_storeu_sd(p,x) _mm_store_sd(p,x)
67 #define _mm_loadu_sd(p) _mm_load_sd(p)
68
69 // Initialize matrix from scalar
FXMat3d(FXdouble s)70 FXMat3d::FXMat3d(FXdouble s){
71 #if defined(FOX_HAS_AVX)
72 _mm256_storeu_pd(&m[0][0],_mm256_set1_pd(s));
73 _mm256_storeu_pd(&m[1][1],_mm256_set1_pd(s));
74 _mm256_storeu_sd(&m[1][1],_mm256_set1_pd(s));
75 #elif defined(FOX_HAS_SSE2)
76 _mm_storeu_pd(&m[0][0],_mm_set1_pd(s));
77 _mm_storeu_pd(&m[0][2],_mm_set1_pd(s));
78 _mm_storeu_pd(&m[1][1],_mm_set1_pd(s));
79 _mm_storeu_pd(&m[2][0],_mm_set1_pd(s));
80 _mm_storeu_sd(&m[2][2],_mm_set1_pd(s));
81 #else
82 m[0][0]=s; m[0][1]=s; m[0][2]=s;
83 m[1][0]=s; m[1][1]=s; m[1][2]=s;
84 m[2][0]=s; m[2][1]=s; m[2][2]=s;
85 #endif
86 }
87
88
89 // Initialize matrix from another matrix
FXMat3d(const FXMat2d & s)90 FXMat3d::FXMat3d(const FXMat2d& s){
91 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=0.0;
92 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=0.0;
93 m[2][0]=0.0; m[2][1]=0.0; m[2][2]=1.0;
94 }
95
96
97 // Initialize matrix from another matrix
FXMat3d(const FXMat3d & s)98 FXMat3d::FXMat3d(const FXMat3d& s){
99 #if defined(FOX_HAS_AVX)
100 _mm256_storeu_pd(&m[0][0],_mm256_loadu_pd(&s[0][0]));
101 _mm256_storeu_pd(&m[1][1],_mm256_loadu_pd(&s[1][1]));
102 _mm256_storeu_sd(&m[2][2],_mm256_loadu_sd(&s[2][2]));
103 #elif defined(FOX_HAS_SSE2)
104 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0][0]));
105 _mm_storeu_pd(&m[0][2],_mm_loadu_pd(&s[0][2]));
106 _mm_storeu_pd(&m[1][1],_mm_loadu_pd(&s[1][1]));
107 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[2][0]));
108 _mm_storeu_sd(&m[2][2],_mm_loadu_sd(&s[2][2]));
109 #else
110 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=s[0][2];
111 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=s[1][2];
112 m[2][0]=s[2][0]; m[2][1]=s[2][1]; m[2][2]=s[2][2];
113 #endif
114 }
115
116
117 // Initialize from rotation and scaling part of 4x4 matrix
FXMat3d(const FXMat4d & s)118 FXMat3d::FXMat3d(const FXMat4d& s){
119 #if defined(FOX_HAS_SSE2)
120 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0][0])); _mm_store_sd(&m[0][2],_mm_load_sd(&s[0][2]));
121 _mm_storeu_pd(&m[1][0],_mm_loadu_pd(&s[1][0])); _mm_store_sd(&m[1][2],_mm_load_sd(&s[1][2]));
122 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[2][0])); _mm_store_sd(&m[2][2],_mm_load_sd(&s[2][2]));
123 #else
124 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=s[0][2];
125 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=s[1][2];
126 m[2][0]=s[2][0]; m[2][1]=s[2][1]; m[2][2]=s[2][2];
127 #endif
128 }
129
130
131 // Initialize matrix from array
FXMat3d(const FXdouble s[])132 FXMat3d::FXMat3d(const FXdouble s[]){
133 #if defined(FOX_HAS_AVX)
134 _mm256_storeu_pd(&m[0][0],_mm256_loadu_pd(&s[0]));
135 _mm256_storeu_pd(&m[1][1],_mm256_loadu_pd(&s[4]));
136 _mm256_storeu_sd(&m[2][2],_mm256_loadu_sd(&s[8]));
137 #elif defined(FOX_HAS_SSE2)
138 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0]));
139 _mm_storeu_pd(&m[0][2],_mm_loadu_pd(&s[2]));
140 _mm_storeu_pd(&m[1][1],_mm_loadu_pd(&s[4]));
141 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[6]));
142 _mm_storeu_sd(&m[2][2],_mm_loadu_sd(&s[8]));
143 #else
144 m[0][0]=s[0]; m[0][1]=s[1]; m[0][2]=s[2];
145 m[1][0]=s[3]; m[1][1]=s[4]; m[1][2]=s[5];
146 m[2][0]=s[6]; m[2][1]=s[7]; m[2][2]=s[8];
147 #endif
148 }
149
150
151 // Initialize diagonal matrix
FXMat3d(FXdouble a,FXdouble b,FXdouble c)152 FXMat3d::FXMat3d(FXdouble a,FXdouble b,FXdouble c){
153 m[0][0]=a; m[0][1]=0.0; m[0][2]=0.0;
154 m[1][0]=0.0; m[1][1]=b; m[1][2]=0.0;
155 m[2][0]=0.0; m[2][1]=0.0; m[2][2]=c;
156 }
157
158
159 // Initialize matrix from components
FXMat3d(FXdouble a00,FXdouble a01,FXdouble a02,FXdouble a10,FXdouble a11,FXdouble a12,FXdouble a20,FXdouble a21,FXdouble a22)160 FXMat3d::FXMat3d(FXdouble a00,FXdouble a01,FXdouble a02,FXdouble a10,FXdouble a11,FXdouble a12,FXdouble a20,FXdouble a21,FXdouble a22){
161 m[0][0]=a00; m[0][1]=a01; m[0][2]=a02;
162 m[1][0]=a10; m[1][1]=a11; m[1][2]=a12;
163 m[2][0]=a20; m[2][1]=a21; m[2][2]=a22;
164 }
165
166
167 // Initialize matrix from three vectors
FXMat3d(const FXVec3d & a,const FXVec3d & b,const FXVec3d & c)168 FXMat3d::FXMat3d(const FXVec3d& a,const FXVec3d& b,const FXVec3d& c){
169 #if defined(FOX_HAS_SSE2)
170 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&a[0])); _mm_store_sd(&m[0][2],_mm_load_sd(&a[2]));
171 _mm_storeu_pd(&m[1][0],_mm_loadu_pd(&b[0])); _mm_store_sd(&m[1][2],_mm_load_sd(&b[2]));
172 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&c[0])); _mm_store_sd(&m[2][2],_mm_load_sd(&c[2]));
173 #else
174 m[0]=a;
175 m[1]=b;
176 m[2]=c;
177 #endif
178 }
179
180
181 // Initialize matrix from quaternion
FXMat3d(const FXQuatd & quat)182 FXMat3d::FXMat3d(const FXQuatd& quat){
183 quat.getAxes(m[0],m[1],m[2]);
184 }
185
186
187 // Assign from scalar
operator =(FXdouble s)188 FXMat3d& FXMat3d::operator=(FXdouble s){
189 #if defined(FOX_HAS_AVX)
190 _mm256_storeu_pd(&m[0][0],_mm256_set1_pd(s));
191 _mm256_storeu_pd(&m[1][1],_mm256_set1_pd(s));
192 _mm256_storeu_sd(&m[1][1],_mm256_set1_pd(s));
193 #elif defined(FOX_HAS_SSE2)
194 _mm_storeu_pd(&m[0][0],_mm_set1_pd(s));
195 _mm_storeu_pd(&m[0][2],_mm_set1_pd(s));
196 _mm_storeu_pd(&m[1][1],_mm_set1_pd(s));
197 _mm_storeu_pd(&m[2][0],_mm_set1_pd(s));
198 _mm_storeu_sd(&m[2][2],_mm_set1_pd(s));
199 #else
200 m[0][0]=s; m[0][1]=s; m[0][2]=s;
201 m[1][0]=s; m[1][1]=s; m[1][2]=s;
202 m[2][0]=s; m[2][1]=s; m[2][2]=s;
203 #endif
204 return *this;
205 }
206
207
208 // Assign from 2x2 rotation and scale matrix
operator =(const FXMat2d & s)209 FXMat3d& FXMat3d::operator=(const FXMat2d& s){
210 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=0.0;
211 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=0.0;
212 m[2][0]=0.0; m[2][1]=0.0; m[2][2]=1.0;
213 return *this;
214 }
215
216
217 // Assignment operator
operator =(const FXMat3d & s)218 FXMat3d& FXMat3d::operator=(const FXMat3d& s){
219 #if defined(FOX_HAS_AVX)
220 _mm256_storeu_pd(&m[0][0],_mm256_loadu_pd(&s[0][0]));
221 _mm256_storeu_pd(&m[1][1],_mm256_loadu_pd(&s[1][1]));
222 _mm256_storeu_sd(&m[2][2],_mm256_loadu_sd(&s[2][2]));
223 #elif defined(FOX_HAS_SSE2)
224 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0][0]));
225 _mm_storeu_pd(&m[0][2],_mm_loadu_pd(&s[0][2]));
226 _mm_storeu_pd(&m[1][1],_mm_loadu_pd(&s[1][1]));
227 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[2][0]));
228 _mm_storeu_sd(&m[2][2],_mm_loadu_sd(&s[2][2]));
229 #else
230 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=s[0][2];
231 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=s[1][2];
232 m[2][0]=s[2][0]; m[2][1]=s[2][1]; m[2][2]=s[2][2];
233 #endif
234 return *this;
235 }
236
237
238 // Assign from rotation and scaling part of 4x4 matrix
operator =(const FXMat4d & s)239 FXMat3d& FXMat3d::operator=(const FXMat4d& s){
240 #if defined(FOX_HAS_SSE2)
241 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0][0])); _mm_store_sd(&m[0][2],_mm_load_sd(&s[0][2]));
242 _mm_storeu_pd(&m[1][0],_mm_loadu_pd(&s[1][0])); _mm_store_sd(&m[1][2],_mm_load_sd(&s[1][2]));
243 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[2][0])); _mm_store_sd(&m[2][2],_mm_load_sd(&s[2][2]));
244 #else
245 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=s[0][2];
246 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=s[1][2];
247 m[2][0]=s[2][0]; m[2][1]=s[2][1]; m[2][2]=s[2][2];
248 #endif
249 return *this;
250 }
251
252
253 // Assignment from quaternion
operator =(const FXQuatd & quat)254 FXMat3d& FXMat3d::operator=(const FXQuatd& quat){
255 quat.getAxes(m[0],m[1],m[2]);
256 return *this;
257 }
258
259
260 // Assignment from array
operator =(const FXdouble s[])261 FXMat3d& FXMat3d::operator=(const FXdouble s[]){
262 #if defined(FOX_HAS_AVX)
263 _mm256_storeu_pd(&m[0][0],_mm256_loadu_pd(&s[0]));
264 _mm256_storeu_pd(&m[1][1],_mm256_loadu_pd(&s[4]));
265 _mm256_storeu_sd(&m[2][2],_mm256_loadu_sd(&s[8]));
266 #elif defined(FOX_HAS_SSE2)
267 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0]));
268 _mm_storeu_pd(&m[0][2],_mm_loadu_pd(&s[2]));
269 _mm_storeu_pd(&m[1][1],_mm_loadu_pd(&s[4]));
270 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[6]));
271 _mm_storeu_sd(&m[2][2],_mm_loadu_sd(&s[8]));
272 #else
273 m[0][0]=s[0]; m[0][1]=s[1]; m[0][2]=s[2];
274 m[1][0]=s[3]; m[1][1]=s[4]; m[1][2]=s[5];
275 m[2][0]=s[6]; m[2][1]=s[7]; m[2][2]=s[8];
276 #endif
277 return *this;
278 }
279
280
281 // Set value from scalar
set(FXdouble s)282 FXMat3d& FXMat3d::set(FXdouble s){
283 #if defined(FOX_HAS_AVX)
284 _mm256_storeu_pd(&m[0][0],_mm256_set1_pd(s));
285 _mm256_storeu_pd(&m[1][1],_mm256_set1_pd(s));
286 _mm256_storeu_sd(&m[1][1],_mm256_set1_pd(s));
287 #elif defined(FOX_HAS_SSE2)
288 _mm_storeu_pd(&m[0][0],_mm_set1_pd(s));
289 _mm_storeu_pd(&m[0][2],_mm_set1_pd(s));
290 _mm_storeu_pd(&m[1][1],_mm_set1_pd(s));
291 _mm_storeu_pd(&m[2][0],_mm_set1_pd(s));
292 _mm_storeu_sd(&m[2][2],_mm_set1_pd(s));
293 #else
294 m[0][0]=s; m[0][1]=s; m[0][2]=s;
295 m[1][0]=s; m[1][1]=s; m[1][2]=s;
296 m[2][0]=s; m[2][1]=s; m[2][2]=s;
297 #endif
298 return *this;
299 }
300
301
302 // Set value from 2x2 rotation and scale matrix
set(const FXMat2d & s)303 FXMat3d& FXMat3d::set(const FXMat2d& s){
304 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=0.0;
305 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=0.0;
306 m[2][0]=0.0; m[2][1]=0.0; m[2][2]=1.0;
307 return *this;
308 }
309
310
311 // Set value from another matrix
set(const FXMat3d & s)312 FXMat3d& FXMat3d::set(const FXMat3d& s){
313 #if defined(FOX_HAS_AVX)
314 _mm256_storeu_pd(&m[0][0],_mm256_loadu_pd(&s[0][0]));
315 _mm256_storeu_pd(&m[1][1],_mm256_loadu_pd(&s[1][1]));
316 _mm256_storeu_sd(&m[2][2],_mm256_loadu_sd(&s[2][2]));
317 #elif defined(FOX_HAS_SSE2)
318 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0][0]));
319 _mm_storeu_pd(&m[0][2],_mm_loadu_pd(&s[0][2]));
320 _mm_storeu_pd(&m[1][1],_mm_loadu_pd(&s[1][1]));
321 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[2][0]));
322 _mm_storeu_sd(&m[2][2],_mm_loadu_sd(&s[2][2]));
323 #else
324 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=s[0][2];
325 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=s[1][2];
326 m[2][0]=s[2][0]; m[2][1]=s[2][1]; m[2][2]=s[2][2];
327 #endif
328 return *this;
329 }
330
331
332 // Set from rotation and scaling part of 4x4 matrix
set(const FXMat4d & s)333 FXMat3d& FXMat3d::set(const FXMat4d& s){
334 #if defined(FOX_HAS_SSE2)
335 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0][0])); _mm_store_sd(&m[0][2],_mm_load_sd(&s[0][2]));
336 _mm_storeu_pd(&m[1][0],_mm_loadu_pd(&s[1][0])); _mm_store_sd(&m[1][2],_mm_load_sd(&s[1][2]));
337 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[2][0])); _mm_store_sd(&m[2][2],_mm_load_sd(&s[2][2]));
338 #else
339 m[0][0]=s[0][0]; m[0][1]=s[0][1]; m[0][2]=s[0][2];
340 m[1][0]=s[1][0]; m[1][1]=s[1][1]; m[1][2]=s[1][2];
341 m[2][0]=s[2][0]; m[2][1]=s[2][1]; m[2][2]=s[2][2];
342 #endif
343 return *this;
344 }
345
346
347 // Set value from array
set(const FXdouble s[])348 FXMat3d& FXMat3d::set(const FXdouble s[]){
349 #if defined(FOX_HAS_AVX)
350 _mm256_storeu_pd(&m[0][0],_mm256_loadu_pd(&s[0]));
351 _mm256_storeu_pd(&m[1][1],_mm256_loadu_pd(&s[4]));
352 _mm256_storeu_sd(&m[2][2],_mm256_loadu_sd(&s[8]));
353 #elif defined(FOX_HAS_SSE2)
354 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&s[0]));
355 _mm_storeu_pd(&m[0][2],_mm_loadu_pd(&s[2]));
356 _mm_storeu_pd(&m[1][1],_mm_loadu_pd(&s[4]));
357 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&s[6]));
358 _mm_storeu_sd(&m[2][2],_mm_loadu_sd(&s[8]));
359 #else
360 m[0][0]=s[0]; m[0][1]=s[1]; m[0][2]=s[2];
361 m[1][0]=s[3]; m[1][1]=s[4]; m[1][2]=s[5];
362 m[2][0]=s[6]; m[2][1]=s[7]; m[2][2]=s[8];
363 #endif
364 return *this;
365 }
366
367
368 // Set diagonal matrix
set(FXdouble a,FXdouble b,FXdouble c)369 FXMat3d& FXMat3d::set(FXdouble a,FXdouble b,FXdouble c){
370 m[0][0]=a; m[0][1]=0.0; m[0][2]=0.0;
371 m[1][0]=0.0; m[1][1]=b; m[1][2]=0.0;
372 m[2][0]=0.0; m[2][1]=0.0; m[2][2]=c;
373 return *this;
374 }
375
376
377 // Set value from components
set(FXdouble a00,FXdouble a01,FXdouble a02,FXdouble a10,FXdouble a11,FXdouble a12,FXdouble a20,FXdouble a21,FXdouble a22)378 FXMat3d& FXMat3d::set(FXdouble a00,FXdouble a01,FXdouble a02,FXdouble a10,FXdouble a11,FXdouble a12,FXdouble a20,FXdouble a21,FXdouble a22){
379 m[0][0]=a00; m[0][1]=a01; m[0][2]=a02;
380 m[1][0]=a10; m[1][1]=a11; m[1][2]=a12;
381 m[2][0]=a20; m[2][1]=a21; m[2][2]=a22;
382 return *this;
383 }
384
385
386 // Set value from three vectors
set(const FXVec3d & a,const FXVec3d & b,const FXVec3d & c)387 FXMat3d& FXMat3d::set(const FXVec3d& a,const FXVec3d& b,const FXVec3d& c){
388 #if defined(FOX_HAS_SSE2)
389 _mm_storeu_pd(&m[0][0],_mm_loadu_pd(&a[0])); _mm_store_sd(&m[0][2],_mm_load_sd(&a[2]));
390 _mm_storeu_pd(&m[1][0],_mm_loadu_pd(&b[0])); _mm_store_sd(&m[1][2],_mm_load_sd(&b[2]));
391 _mm_storeu_pd(&m[2][0],_mm_loadu_pd(&c[0])); _mm_store_sd(&m[2][2],_mm_load_sd(&c[2]));
392 #else
393 m[0]=a;
394 m[1]=b;
395 m[2]=c;
396 #endif
397 return *this;
398 }
399
400
401 // Set value from quaternion
set(const FXQuatd & quat)402 FXMat3d& FXMat3d::set(const FXQuatd& quat){
403 quat.getAxes(m[0],m[1],m[2]);
404 return *this;
405 }
406
407
408 // Add matrices
operator +=(const FXMat3d & w)409 FXMat3d& FXMat3d::operator+=(const FXMat3d& w){
410 #if defined(FOX_HAS_AVX)
411 _mm256_storeu_pd(&m[0][0],_mm256_add_pd(_mm256_loadu_pd(&m[0][0]),_mm256_loadu_pd(&w[0][0])));
412 _mm256_storeu_pd(&m[1][1],_mm256_add_pd(_mm256_loadu_pd(&m[1][1]),_mm256_loadu_pd(&w[1][1])));
413 _mm256_storeu_sd(&m[2][2],_mm256_add_sd(_mm256_loadu_sd(&m[2][2]),_mm256_loadu_sd(&w[2][2])));
414 #elif defined(FOX_HAS_SSE2)
415 _mm_storeu_pd(&m[0][0],_mm_add_pd(_mm_loadu_pd(&m[0][0]),_mm_loadu_pd(&w[0][0])));
416 _mm_storeu_pd(&m[0][2],_mm_add_pd(_mm_loadu_pd(&m[0][2]),_mm_loadu_pd(&w[0][2])));
417 _mm_storeu_pd(&m[1][1],_mm_add_pd(_mm_loadu_pd(&m[1][1]),_mm_loadu_pd(&w[1][1])));
418 _mm_storeu_pd(&m[2][0],_mm_add_pd(_mm_loadu_pd(&m[2][0]),_mm_loadu_pd(&w[2][0])));
419 _mm_storeu_sd(&m[2][2],_mm_add_sd(_mm_loadu_sd(&m[2][2]),_mm_loadu_sd(&w[2][2])));
420 #else
421 m[0][0]+=w[0][0]; m[0][1]+=w[0][1]; m[0][2]+=w[0][2];
422 m[1][0]+=w[1][0]; m[1][1]+=w[1][1]; m[1][2]+=w[1][2];
423 m[2][0]+=w[2][0]; m[2][1]+=w[2][1]; m[2][2]+=w[2][2];
424 #endif
425 return *this;
426 }
427
428
429 // Subtract matrices
operator -=(const FXMat3d & w)430 FXMat3d& FXMat3d::operator-=(const FXMat3d& w){
431 #if defined(FOX_HAS_AVX)
432 _mm256_storeu_pd(&m[0][0],_mm256_sub_pd(_mm256_loadu_pd(&m[0][0]),_mm256_loadu_pd(&w[0][0])));
433 _mm256_storeu_pd(&m[1][1],_mm256_sub_pd(_mm256_loadu_pd(&m[1][1]),_mm256_loadu_pd(&w[1][1])));
434 _mm256_storeu_sd(&m[2][2],_mm256_sub_sd(_mm256_loadu_sd(&m[2][2]),_mm256_loadu_sd(&w[2][2])));
435 #elif defined(FOX_HAS_SSE2)
436 _mm_storeu_pd(&m[0][0],_mm_sub_pd(_mm_loadu_pd(&m[0][0]),_mm_loadu_pd(&w[0][0])));
437 _mm_storeu_pd(&m[0][2],_mm_sub_pd(_mm_loadu_pd(&m[0][2]),_mm_loadu_pd(&w[0][2])));
438 _mm_storeu_pd(&m[1][1],_mm_sub_pd(_mm_loadu_pd(&m[1][1]),_mm_loadu_pd(&w[1][1])));
439 _mm_storeu_pd(&m[2][0],_mm_sub_pd(_mm_loadu_pd(&m[2][0]),_mm_loadu_pd(&w[2][0])));
440 _mm_storeu_sd(&m[2][2],_mm_sub_sd(_mm_loadu_sd(&m[2][2]),_mm_loadu_sd(&w[2][2])));
441 #else
442 m[0][0]-=w[0][0]; m[0][1]-=w[0][1]; m[0][2]-=w[0][2];
443 m[1][0]-=w[1][0]; m[1][1]-=w[1][1]; m[1][2]-=w[1][2];
444 m[2][0]-=w[2][0]; m[2][1]-=w[2][1]; m[2][2]-=w[2][2];
445 #endif
446 return *this;
447 }
448
449
450 // Multiply matrix by scalar
operator *=(FXdouble s)451 FXMat3d& FXMat3d::operator*=(FXdouble s){
452 #if defined(FOX_HAS_AVX)
453 _mm256_storeu_pd(&m[0][0],_mm256_mul_pd(_mm256_loadu_pd(&m[0][0]),_mm256_set1_pd(s)));
454 _mm256_storeu_pd(&m[1][1],_mm256_mul_pd(_mm256_loadu_pd(&m[1][1]),_mm256_set1_pd(s)));
455 _mm256_storeu_sd(&m[2][2],_mm256_mul_sd(_mm256_loadu_sd(&m[2][2]),_mm256_set1_pd(s)));
456 #elif defined(FOX_HAS_SSE2)
457 _mm_storeu_pd(&m[0][0],_mm_mul_pd(_mm_loadu_pd(&m[0][0]),_mm_set1_pd(s)));
458 _mm_storeu_pd(&m[0][2],_mm_mul_pd(_mm_loadu_pd(&m[0][2]),_mm_set1_pd(s)));
459 _mm_storeu_pd(&m[1][1],_mm_mul_pd(_mm_loadu_pd(&m[1][1]),_mm_set1_pd(s)));
460 _mm_storeu_pd(&m[2][0],_mm_mul_pd(_mm_loadu_pd(&m[2][0]),_mm_set1_pd(s)));
461 _mm_storeu_sd(&m[2][2],_mm_mul_sd(_mm_loadu_sd(&m[2][2]),_mm_set1_pd(s)));
462 #else
463 m[0][0]*=s; m[0][1]*=s; m[0][2]*=s;
464 m[1][0]*=s; m[1][1]*=s; m[1][2]*=s;
465 m[2][0]*=s; m[2][1]*=s; m[2][2]*=s;
466 #endif
467 return *this;
468 }
469
470
471 // Multiply matrix by matrix
operator *=(const FXMat3d & s)472 FXMat3d& FXMat3d::operator*=(const FXMat3d& s){
473 #if defined(FOX_HAS_AVX)
474 __m256d b0=_mm256_maskload_pd(s[0],MMM);
475 __m256d b1=_mm256_maskload_pd(s[1],MMM);
476 __m256d b2=_mm256_maskload_pd(s[2],MMM);
477 __m256d xx,yy,zz;
478 xx=_mm256_set1_pd(m[0][0]);
479 yy=_mm256_set1_pd(m[0][1]);
480 zz=_mm256_set1_pd(m[0][2]);
481 _mm256_maskstore_pd(m[0],MMM,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(b0,xx),_mm256_mul_pd(b1,yy)),_mm256_mul_pd(b2,zz)));
482 xx=_mm256_set1_pd(m[1][0]);
483 yy=_mm256_set1_pd(m[1][1]);
484 zz=_mm256_set1_pd(m[1][2]);
485 _mm256_maskstore_pd(m[1],MMM,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(b0,xx),_mm256_mul_pd(b1,yy)),_mm256_mul_pd(b2,zz)));
486 xx=_mm256_set1_pd(m[2][0]);
487 yy=_mm256_set1_pd(m[2][1]);
488 zz=_mm256_set1_pd(m[2][2]);
489 _mm256_maskstore_pd(m[2],MMM,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(b0,xx),_mm256_mul_pd(b1,yy)),_mm256_mul_pd(b2,zz)));
490 return *this;
491 #elif defined(FOX_HAS_SSE2)
492 __m128d b00=_mm_loadu_pd(&s[0][0]);
493 __m128d b02=_mm_load_sd(&s[0][2]);
494 __m128d b10=_mm_loadu_pd(&s[1][0]);
495 __m128d b12=_mm_load_sd(&s[1][2]);
496 __m128d b20=_mm_loadu_pd(&s[2][0]);
497 __m128d b22=_mm_load_sd(&s[2][2]);
498 __m128d xx,yy,zz;
499 xx=_mm_set1_pd(m[0][0]);
500 yy=_mm_set1_pd(m[0][1]);
501 zz=_mm_set1_pd(m[0][2]);
502 _mm_storeu_pd(&m[0][0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(b00,xx),_mm_mul_pd(b10,yy)),_mm_mul_pd(b20,zz)));
503 _mm_store_sd(&m[0][2],_mm_add_sd(_mm_add_sd(_mm_mul_sd(b02,xx),_mm_mul_sd(b12,yy)),_mm_mul_sd(b22,zz)));
504 xx=_mm_set1_pd(m[1][0]);
505 yy=_mm_set1_pd(m[1][1]);
506 zz=_mm_set1_pd(m[1][2]);
507 _mm_storeu_pd(&m[1][0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(b00,xx),_mm_mul_pd(b10,yy)),_mm_mul_pd(b20,zz)));
508 _mm_store_sd(&m[1][2],_mm_add_sd(_mm_add_sd(_mm_mul_sd(b02,xx),_mm_mul_sd(b12,yy)),_mm_mul_sd(b22,zz)));
509 xx=_mm_set1_pd(m[2][0]);
510 yy=_mm_set1_pd(m[2][1]);
511 zz=_mm_set1_pd(m[2][2]);
512 _mm_storeu_pd(&m[2][0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(b00,xx),_mm_mul_pd(b10,yy)),_mm_mul_pd(b20,zz)));
513 _mm_store_sd(&m[2][2],_mm_add_sd(_mm_add_sd(_mm_mul_sd(b02,xx),_mm_mul_sd(b12,yy)),_mm_mul_sd(b22,zz)));
514 return *this;
515 #else
516 FXdouble x,y,z;
517 x=m[0][0]; y=m[0][1]; z=m[0][2];
518 m[0][0]=x*s[0][0]+y*s[1][0]+z*s[2][0];
519 m[0][1]=x*s[0][1]+y*s[1][1]+z*s[2][1];
520 m[0][2]=x*s[0][2]+y*s[1][2]+z*s[2][2];
521 x=m[1][0]; y=m[1][1]; z=m[1][2];
522 m[1][0]=x*s[0][0]+y*s[1][0]+z*s[2][0];
523 m[1][1]=x*s[0][1]+y*s[1][1]+z*s[2][1];
524 m[1][2]=x*s[0][2]+y*s[1][2]+z*s[2][2];
525 x=m[2][0]; y=m[2][1]; z=m[2][2];
526 m[2][0]=x*s[0][0]+y*s[1][0]+z*s[2][0];
527 m[2][1]=x*s[0][1]+y*s[1][1]+z*s[2][1];
528 m[2][2]=x*s[0][2]+y*s[1][2]+z*s[2][2];
529 return *this;
530 #endif
531 }
532
533
534 // Divide matrix by scalar
operator /=(FXdouble s)535 FXMat3d& FXMat3d::operator/=(FXdouble s){
536 #if defined(FOX_HAS_AVX)
537 _mm256_storeu_pd(&m[0][0],_mm256_div_pd(_mm256_loadu_pd(&m[0][0]),_mm256_set1_pd(s)));
538 _mm256_storeu_pd(&m[1][1],_mm256_div_pd(_mm256_loadu_pd(&m[1][1]),_mm256_set1_pd(s)));
539 _mm256_storeu_sd(&m[2][2],_mm256_div_sd(_mm256_loadu_sd(&m[2][2]),_mm256_set1_pd(s)));
540 #elif defined(FOX_HAS_SSE2)
541 _mm_storeu_pd(&m[0][0],_mm_div_pd(_mm_loadu_pd(&m[0][0]),_mm_set1_pd(s)));
542 _mm_storeu_pd(&m[0][2],_mm_div_pd(_mm_loadu_pd(&m[0][2]),_mm_set1_pd(s)));
543 _mm_storeu_pd(&m[1][1],_mm_div_pd(_mm_loadu_pd(&m[1][1]),_mm_set1_pd(s)));
544 _mm_storeu_pd(&m[2][0],_mm_div_pd(_mm_loadu_pd(&m[2][0]),_mm_set1_pd(s)));
545 _mm_storeu_sd(&m[2][2],_mm_div_sd(_mm_loadu_sd(&m[2][2]),_mm_set1_pd(s)));
546 #else
547 m[0][0]/=s; m[0][1]/=s; m[0][2]/=s;
548 m[1][0]/=s; m[1][1]/=s; m[1][2]/=s;
549 m[2][0]/=s; m[2][1]/=s; m[2][2]/=s;
550 #endif
551 return *this;
552 }
553
554
555 // Negate matrix
operator -() const556 FXMat3d FXMat3d::operator-() const {
557 #if defined(FOX_HAS_AVX)
558 FXMat3d r;
559 _mm256_storeu_pd(&r[0][0],_mm256_sub_pd(_mm256_set1_pd(0.0),_mm256_loadu_pd(&m[0][0])));
560 _mm256_storeu_pd(&r[1][1],_mm256_sub_pd(_mm256_set1_pd(0.0),_mm256_loadu_pd(&m[1][1])));
561 _mm256_storeu_sd(&r[2][2],_mm256_sub_sd(_mm256_set1_pd(0.0),_mm256_loadu_sd(&m[2][2])));
562 return r;
563 #elif defined(FOX_HAS_SSE2)
564 FXMat3d r;
565 _mm_storeu_pd(&r[0][0],_mm_sub_pd(_mm_set1_pd(0.0),_mm_loadu_pd(&m[0][0])));
566 _mm_storeu_pd(&r[0][2],_mm_sub_pd(_mm_set1_pd(0.0),_mm_loadu_pd(&m[0][2])));
567 _mm_storeu_pd(&r[1][1],_mm_sub_pd(_mm_set1_pd(0.0),_mm_loadu_pd(&m[1][1])));
568 _mm_storeu_pd(&r[2][0],_mm_sub_pd(_mm_set1_pd(0.0),_mm_loadu_pd(&m[2][0])));
569 _mm_storeu_sd(&r[2][2],_mm_sub_sd(_mm_set1_pd(0.0),_mm_loadu_sd(&m[2][2])));
570 return r;
571 #else
572 return FXMat3d(-m[0][0],-m[0][1],-m[0][2],
573 -m[1][0],-m[1][1],-m[1][2],
574 -m[2][0],-m[2][1],-m[2][2]);
575 #endif
576 }
577
578
579 // Set to identity matrix
identity()580 FXMat3d& FXMat3d::identity(){
581 #if defined(FOX_HAS_AVX)
582 _mm256_storeu_pd(&m[0][0],_mm256_set_pd(0.0,0.0,0.0,1.0));
583 _mm256_storeu_pd(&m[1][1],_mm256_set_pd(0.0,0.0,0.0,1.0));
584 _mm256_storeu_sd(&m[2][2],_mm256_set_pd(0.0,0.0,0.0,1.0));
585 #elif defined(FOX_HAS_SSE2)
586 _mm_storeu_pd(&m[0][0],_mm_set_pd(0.0,1.0));
587 _mm_storeu_pd(&m[0][2],_mm_set_pd(0.0,0.0));
588 _mm_storeu_pd(&m[1][1],_mm_set_pd(0.0,1.0));
589 _mm_storeu_pd(&m[2][0],_mm_set_pd(0.0,0.0));
590 _mm_storeu_sd(&m[2][2],_mm_set_pd(0.0,1.0));
591 #else
592 m[0][0]=1.0; m[0][1]=0.0; m[0][2]=0.0;
593 m[1][0]=0.0; m[1][1]=1.0; m[1][2]=0.0;
594 m[2][0]=0.0; m[2][1]=0.0; m[2][2]=1.0;
595 #endif
596 return *this;
597 }
598
599
600 // Return true if identity matrix
isIdentity() const601 FXbool FXMat3d::isIdentity() const {
602 return m[0][0]==1.0 && m[0][1]==0.0 && m[0][2]==0.0 &&
603 m[1][0]==0.0 && m[1][1]==1.0 && m[1][2]==0.0 &&
604 m[2][0]==0.0 && m[2][1]==0.0 && m[2][2]==1.0;
605 }
606
607
608 // Rotate using unit quaternion
rot(const FXQuatd & q)609 FXMat3d& FXMat3d::rot(const FXQuatd& q){
610 return *this*=FXMat3d(q);
611 }
612
613
614 // Multiply by rotation c,s about unit axis
rot(const FXVec3d & v,FXdouble c,FXdouble s)615 FXMat3d& FXMat3d::rot(const FXVec3d& v,FXdouble c,FXdouble s){
616 FXdouble xx=v.x*v.x;
617 FXdouble yy=v.y*v.y;
618 FXdouble zz=v.z*v.z;
619 FXdouble xy=v.x*v.y;
620 FXdouble yz=v.y*v.z;
621 FXdouble zx=v.z*v.x;
622 FXdouble xs=v.x*s;
623 FXdouble ys=v.y*s;
624 FXdouble zs=v.z*s;
625 FXdouble t=1.0-c;
626 return *this*=FXMat3d(t*xx+c,t*xy+zs,t*zx-ys,t*xy-zs,t*yy+c,t*yz+xs,t*zx+ys,t*yz-xs,t*zz+c);
627 }
628
629
630 // Multiply by rotation of phi about unit axis
rot(const FXVec3d & v,FXdouble phi)631 FXMat3d& FXMat3d::rot(const FXVec3d& v,FXdouble phi){
632 return rot(v,Math::cos(phi),Math::sin(phi));
633 }
634
635
636 // Rotate about x-axis
xrot(FXdouble c,FXdouble s)637 FXMat3d& FXMat3d::xrot(FXdouble c,FXdouble s){
638 FXdouble u,v;
639 u=m[1][0]; v=m[2][0]; m[1][0]=c*u+s*v; m[2][0]=c*v-s*u;
640 u=m[1][1]; v=m[2][1]; m[1][1]=c*u+s*v; m[2][1]=c*v-s*u;
641 u=m[1][2]; v=m[2][2]; m[1][2]=c*u+s*v; m[2][2]=c*v-s*u;
642 return *this;
643 }
644
645
646 // Rotate by angle about x-axis
xrot(FXdouble phi)647 FXMat3d& FXMat3d::xrot(FXdouble phi){
648 return xrot(Math::cos(phi),Math::sin(phi));
649 }
650
651
652 // Rotate about y-axis
yrot(FXdouble c,FXdouble s)653 FXMat3d& FXMat3d::yrot(FXdouble c,FXdouble s){
654 FXdouble u,v;
655 u=m[0][0]; v=m[2][0]; m[0][0]=c*u-s*v; m[2][0]=c*v+s*u;
656 u=m[0][1]; v=m[2][1]; m[0][1]=c*u-s*v; m[2][1]=c*v+s*u;
657 u=m[0][2]; v=m[2][2]; m[0][2]=c*u-s*v; m[2][2]=c*v+s*u;
658 return *this;
659 }
660
661
662 // Rotate by angle about y-axis
yrot(FXdouble phi)663 FXMat3d& FXMat3d::yrot(FXdouble phi){
664 return yrot(Math::cos(phi),Math::sin(phi));
665 }
666
667
668 // Rotate about z-axis
zrot(FXdouble c,FXdouble s)669 FXMat3d& FXMat3d::zrot(FXdouble c,FXdouble s){
670 FXdouble u,v;
671 u=m[0][0]; v=m[1][0]; m[0][0]=c*u+s*v; m[1][0]=c*v-s*u;
672 u=m[0][1]; v=m[1][1]; m[0][1]=c*u+s*v; m[1][1]=c*v-s*u;
673 u=m[0][2]; v=m[1][2]; m[0][2]=c*u+s*v; m[1][2]=c*v-s*u;
674 return *this;
675 }
676
677
678 // Rotate by angle about z-axis
zrot(FXdouble phi)679 FXMat3d& FXMat3d::zrot(FXdouble phi){
680 return zrot(Math::cos(phi),Math::sin(phi));
681 }
682
683
684 // Scale unqual
scale(FXdouble sx,FXdouble sy,FXdouble sz)685 FXMat3d& FXMat3d::scale(FXdouble sx,FXdouble sy,FXdouble sz){
686 m[0][0]*=sx; m[0][1]*=sx; m[0][2]*=sx;
687 m[1][0]*=sy; m[1][1]*=sy; m[1][2]*=sy;
688 m[2][0]*=sz; m[2][1]*=sz; m[2][2]*=sz;
689 return *this;
690 }
691
692
693 // Scale unqual
scale(const FXVec3d & v)694 FXMat3d& FXMat3d::scale(const FXVec3d& v){
695 return scale(v[0],v[1],v[2]);
696 }
697
698
699 // Scale uniform
scale(FXdouble s)700 FXMat3d& FXMat3d::scale(FXdouble s){
701 return scale(s,s,s);
702 }
703
704
705 // Calculate determinant
det() const706 FXdouble FXMat3d::det() const {
707 return m[0][0]*(m[1][1]*m[2][2]-m[2][1]*m[1][2])+
708 m[0][1]*(m[2][0]*m[1][2]-m[1][0]*m[2][2])+
709 m[0][2]*(m[1][0]*m[2][1]-m[2][0]*m[1][1]);
710 }
711
712
713 // Transpose matrix
transpose() const714 FXMat3d FXMat3d::transpose() const {
715 return FXMat3d(m[0][0],m[1][0],m[2][0],
716 m[0][1],m[1][1],m[2][1],
717 m[0][2],m[1][2],m[2][2]);
718 }
719
720
721 // Invert matrix
invert() const722 FXMat3d FXMat3d::invert() const {
723 FXdouble dd;
724 FXMat3d res;
725 res[0][0]=m[1][1]*m[2][2]-m[1][2]*m[2][1];
726 res[0][1]=m[0][2]*m[2][1]-m[0][1]*m[2][2];
727 res[0][2]=m[0][1]*m[1][2]-m[0][2]*m[1][1];
728 res[1][0]=m[1][2]*m[2][0]-m[1][0]*m[2][2];
729 res[1][1]=m[0][0]*m[2][2]-m[0][2]*m[2][0];
730 res[1][2]=m[0][2]*m[1][0]-m[0][0]*m[1][2];
731 res[2][0]=m[1][0]*m[2][1]-m[1][1]*m[2][0];
732 res[2][1]=m[0][1]*m[2][0]-m[0][0]*m[2][1];
733 res[2][2]=m[0][0]*m[1][1]-m[0][1]*m[1][0];
734 dd=m[0][0]*res[0][0]+m[0][1]*res[1][0]+m[0][2]*res[2][0];
735 FXASSERT(dd!=0.0);
736 dd=1.0/dd;
737 res[0][0]*=dd;
738 res[0][1]*=dd;
739 res[0][2]*=dd;
740 res[1][0]*=dd;
741 res[1][1]*=dd;
742 res[1][2]*=dd;
743 res[2][0]*=dd;
744 res[2][1]*=dd;
745 res[2][2]*=dd;
746 return res;
747 }
748
749
750 // Orthogonalize matrix
751 // Uses Gram-Schmidt orthogonalization on a row-by-row basis
orthogonalize(const FXMat3d & m)752 FXMat3d orthogonalize(const FXMat3d& m){
753 FXMat3d result(m);
754 result[0]/=result[0].length();
755 result[1]-=result[0]*(result[1]*result[0]);
756 result[1]/=result[1].length();
757 result[2]-=result[0]*(result[2]*result[0]);
758 result[2]-=result[1]*(result[2]*result[1]);
759 result[2]/=result[2].length();
760 return result;
761 }
762
763
764 // Matrix times vector
operator *(const FXMat3d & m,const FXVec2d & v)765 FXVec2d operator*(const FXMat3d& m,const FXVec2d& v){
766 #if defined(FOX_HAS_SSE3)
767 __m128d vv=_mm_loadu_pd(&v[0]);
768 __m128d r00=_mm_mul_pd(_mm_loadu_pd(&m[0][0]),vv); // m01*v1 m00*v0
769 __m128d r10=_mm_mul_pd(_mm_loadu_pd(&m[1][0]),vv); // m11*v1 m10*v0
770 __m128d r02=_mm_load_sd(&m[0][2]); // 0 m02
771 __m128d r12=_mm_load_sd(&m[1][2]); // 0 m12
772 FXVec2d r;
773 r00=_mm_hadd_pd(r00,r02); // m02 m01*v1+m00*v0
774 r10=_mm_hadd_pd(r10,r12); // m12 m11*v1+m10*v0
775 r00=_mm_hadd_pd(r00,r10); // m12+m11*v1+m10*v0 m02+m01*v1+m00*v0
776 _mm_storeu_pd(&r[0],r00);
777 return r;
778 #else
779 return FXVec2d(m[0][0]*v[0]+m[0][1]*v[1]+m[0][2], m[1][0]*v[0]+m[1][1]*v[1]+m[1][2]);
780 #endif
781 }
782
783
784 // Matrix times vector
operator *(const FXMat3d & m,const FXVec3d & v)785 FXVec3d operator*(const FXMat3d& m,const FXVec3d& v){
786 #if defined(FOX_HAS_SSE3)
787 __m128d v0=_mm_loadu_pd(&v[0]);
788 __m128d v1=_mm_load_sd(&v[2]);
789 __m128d r00=_mm_mul_pd(_mm_loadu_pd(&m[0][0]),v0); // m01*v1 m00*v0
790 __m128d r10=_mm_mul_pd(_mm_loadu_pd(&m[1][0]),v0); // m11*v1 m10*v0
791 __m128d r20=_mm_mul_pd(_mm_loadu_pd(&m[2][0]),v0); // m21*v1 m20*v0
792 __m128d r02=_mm_mul_pd(_mm_load_sd(&m[0][2]),v1); // 0 m02*v2
793 __m128d r12=_mm_mul_pd(_mm_load_sd(&m[1][2]),v1); // 0 m12*v2
794 __m128d r22=_mm_mul_pd(_mm_load_sd(&m[2][2]),v1); // 0 m22*v2
795 FXVec3d r;
796 r00=_mm_hadd_pd(r00,r02); // m02*v2 m01*v1+m00*v0
797 r10=_mm_hadd_pd(r10,r12); // m12*v2 m11*v1+m10*v0
798 r20=_mm_hadd_pd(r20,r22); // m22*v2 m21*v1+m20*v0
799 r00=_mm_hadd_pd(r00,r10); // m12*v2+m11*v1+m10*v0 m02*v2+m01*v1+m00*v0
800 r20=_mm_hadd_pd(r20,r20); // m22*v2+m21*v1+m20*v0 m22*v2+m21*v1+m20*v0
801 _mm_storeu_pd(&r[0],r00); _mm_store_sd(&r[2],r20);
802 return r;
803 #else
804 return FXVec3d(m[0][0]*v[0]+m[0][1]*v[1]+m[0][2]*v[2], m[1][0]*v[0]+m[1][1]*v[1]+m[1][2]*v[2], m[2][0]*v[0]+m[2][1]*v[1]+m[2][2]*v[2]);
805 #endif
806 }
807
808
809 // Vector times matrix
operator *(const FXVec2d & v,const FXMat3d & m)810 FXVec2d operator*(const FXVec2d& v,const FXMat3d& m){
811 #if defined(FOX_HAS_SSE2)
812 __m128d m00=_mm_loadu_pd(&m[0][0]);
813 __m128d m10=_mm_loadu_pd(&m[1][0]);
814 __m128d m20=_mm_loadu_pd(&m[2][0]);
815 __m128d v0=_mm_set1_pd(v[0]);
816 __m128d v1=_mm_set1_pd(v[1]);
817 FXVec2d r;
818 _mm_storeu_pd(&r[0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(m00,v0),_mm_mul_pd(m10,v1)),m20));
819 return r;
820 #else
821 return FXVec2d(v[0]*m[0][0]+v[1]*m[1][0]+m[2][0],v[0]*m[0][1]+v[1]*m[1][1]+m[2][1]);
822 #endif
823 }
824
825
826 // Vector times matrix
operator *(const FXVec3d & v,const FXMat3d & m)827 FXVec3d operator*(const FXVec3d& v,const FXMat3d& m){
828 #if defined(FOX_HAS_AVX)
829 __m256d m0=_mm256_maskload_pd(m[0],MMM);
830 __m256d m1=_mm256_maskload_pd(m[1],MMM);
831 __m256d m2=_mm256_maskload_pd(m[2],MMM);
832 __m256d v0=_mm256_set1_pd(v[0]);
833 __m256d v1=_mm256_set1_pd(v[1]);
834 __m256d v2=_mm256_set1_pd(v[2]);
835 FXVec3d r;
836 _mm256_maskstore_pd(&r[0],MMM,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(v0,m0),_mm256_mul_pd(v1,m1)),_mm256_mul_pd(v2,m2)));
837 return r;
838 #elif defined(FOX_HAS_SSE2)
839 __m128d m00=_mm_loadu_pd(&m[0][0]);
840 __m128d m10=_mm_loadu_pd(&m[1][0]);
841 __m128d m20=_mm_loadu_pd(&m[2][0]);
842 __m128d m02=_mm_load_sd(&m[0][2]);
843 __m128d m12=_mm_load_sd(&m[1][2]);
844 __m128d m22=_mm_load_sd(&m[2][2]);
845 __m128d v0=_mm_set1_pd(v[0]);
846 __m128d v1=_mm_set1_pd(v[1]);
847 __m128d v2=_mm_set1_pd(v[2]);
848 FXVec3d r;
849 _mm_storeu_pd(&r[0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(m00,v0),_mm_mul_pd(m10,v1)),_mm_mul_pd(m20,v2)));
850 _mm_store_sd(&r[2],_mm_add_sd(_mm_add_sd(_mm_mul_sd(m02,v0),_mm_mul_sd(m12,v1)),_mm_mul_sd(m22,v2)));
851 return r;
852 #else
853 return FXVec3d(v[0]*m[0][0]+v[1]*m[1][0]+v[2]*m[2][0], v[0]*m[0][1]+v[1]*m[1][1]+v[2]*m[2][1], v[0]*m[0][2]+v[1]*m[1][2]+v[2]*m[2][2]);
854 #endif
855 }
856
857
858 // Matrix and matrix add
operator +(const FXMat3d & a,const FXMat3d & b)859 FXMat3d operator+(const FXMat3d& a,const FXMat3d& b){
860 #if defined(FOX_HAS_AVX)
861 FXMat3d r;
862 _mm256_maskstore_pd(&r[0][0],MMM,_mm256_add_pd(_mm256_maskload_pd(&a[0][0],MMM),_mm256_maskload_pd(&b[0][0],MMM)));
863 _mm256_maskstore_pd(&r[1][0],MMM,_mm256_add_pd(_mm256_maskload_pd(&a[1][0],MMM),_mm256_maskload_pd(&b[1][0],MMM)));
864 _mm256_maskstore_pd(&r[2][0],MMM,_mm256_add_pd(_mm256_maskload_pd(&a[2][0],MMM),_mm256_maskload_pd(&b[2][0],MMM)));
865 return r;
866 #elif defined(FOX_HAS_SSE2)
867 FXMat3d r;
868 _mm_storeu_pd(&r[0][0],_mm_add_pd(_mm_loadu_pd(&a[0][0]),_mm_loadu_pd(&b[0][0])));
869 _mm_storeu_pd(&r[0][2],_mm_add_pd(_mm_loadu_pd(&a[0][2]),_mm_loadu_pd(&b[0][2])));
870 _mm_storeu_pd(&r[1][1],_mm_add_pd(_mm_loadu_pd(&a[1][1]),_mm_loadu_pd(&b[1][1])));
871 _mm_storeu_pd(&r[2][0],_mm_add_pd(_mm_loadu_pd(&a[2][0]),_mm_loadu_pd(&b[2][0])));
872 _mm_storeu_sd(&r[2][2],_mm_add_sd(_mm_loadu_sd(&a[2][2]),_mm_loadu_sd(&b[2][2])));
873 return r;
874 #else
875 return FXMat3d(a[0][0]+b[0][0],a[0][1]+b[0][1],a[0][2]+b[0][2], a[1][0]+b[1][0],a[1][1]+b[1][1],a[1][2]+b[1][2], a[2][0]+b[2][0],a[2][1]+b[2][1],a[2][2]+b[2][2]);
876 #endif
877 }
878
879
880 // Matrix and matrix subtract
operator -(const FXMat3d & a,const FXMat3d & b)881 FXMat3d operator-(const FXMat3d& a,const FXMat3d& b){
882 #if defined(FOX_HAS_AVX)
883 FXMat3d r;
884 _mm256_maskstore_pd(&r[0][0],MMM,_mm256_sub_pd(_mm256_maskload_pd(&a[0][0],MMM),_mm256_maskload_pd(&b[0][0],MMM)));
885 _mm256_maskstore_pd(&r[1][0],MMM,_mm256_sub_pd(_mm256_maskload_pd(&a[1][0],MMM),_mm256_maskload_pd(&b[1][0],MMM)));
886 _mm256_maskstore_pd(&r[2][0],MMM,_mm256_sub_pd(_mm256_maskload_pd(&a[2][0],MMM),_mm256_maskload_pd(&b[2][0],MMM)));
887 return r;
888 #elif defined(FOX_HAS_SSE2)
889 FXMat3d r;
890 _mm_storeu_pd(&r[0][0],_mm_sub_pd(_mm_loadu_pd(&a[0][0]),_mm_loadu_pd(&b[0][0])));
891 _mm_storeu_pd(&r[0][2],_mm_sub_pd(_mm_loadu_pd(&a[0][2]),_mm_loadu_pd(&b[0][2])));
892 _mm_storeu_pd(&r[1][1],_mm_sub_pd(_mm_loadu_pd(&a[1][1]),_mm_loadu_pd(&b[1][1])));
893 _mm_storeu_pd(&r[2][0],_mm_sub_pd(_mm_loadu_pd(&a[2][0]),_mm_loadu_pd(&b[2][0])));
894 _mm_storeu_sd(&r[2][2],_mm_sub_sd(_mm_loadu_sd(&a[2][2]),_mm_loadu_sd(&b[2][2])));
895 return r;
896 #else
897 return FXMat3d(a[0][0]-b[0][0],a[0][1]-b[0][1],a[0][2]-b[0][2], a[1][0]-b[1][0],a[1][1]-b[1][1],a[1][2]-b[1][2], a[2][0]-b[2][0],a[2][1]-b[2][1],a[2][2]-b[2][2]);
898 #endif
899 }
900
901
902 // Matrix and matrix multiply
operator *(const FXMat3d & a,const FXMat3d & b)903 FXMat3d operator*(const FXMat3d& a,const FXMat3d& b){
904 #if defined(FOX_HAS_AVX)
905 __m256d b0=_mm256_maskload_pd(b[0],MMM);
906 __m256d b1=_mm256_maskload_pd(b[0],MMM);
907 __m256d b2=_mm256_maskload_pd(b[0],MMM);
908 __m256d xx,yy,zz;
909 FXMat3d r;
910 xx=_mm256_set1_pd(a[0][0]);
911 yy=_mm256_set1_pd(a[0][1]);
912 zz=_mm256_set1_pd(a[0][2]);
913 _mm256_maskstore_pd(r[0],MMM,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(b0,xx),_mm256_mul_pd(b1,yy)),_mm256_mul_pd(b2,zz)));
914 xx=_mm256_set1_pd(a[1][0]);
915 yy=_mm256_set1_pd(a[1][1]);
916 zz=_mm256_set1_pd(a[1][2]);
917 _mm256_maskstore_pd(r[1],MMM,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(b0,xx),_mm256_mul_pd(b1,yy)),_mm256_mul_pd(b2,zz)));
918 xx=_mm256_set1_pd(a[2][0]);
919 yy=_mm256_set1_pd(a[2][1]);
920 zz=_mm256_set1_pd(a[2][2]);
921 _mm256_maskstore_pd(r[2],MMM,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(b0,xx),_mm256_mul_pd(b1,yy)),_mm256_mul_pd(b2,zz)));
922 return r;
923 #elif defined(FOX_HAS_SSE2)
924 __m128d b00=_mm_loadu_pd(&b[0][0]);
925 __m128d b10=_mm_loadu_pd(&b[1][0]);
926 __m128d b20=_mm_loadu_pd(&b[2][0]);
927 __m128d b02=_mm_load_sd(&b[0][2]);
928 __m128d b12=_mm_load_sd(&b[1][2]);
929 __m128d b22=_mm_load_sd(&b[2][2]);
930 __m128d xx,yy,zz;
931 FXMat3d r;
932 xx=_mm_set1_pd(a[0][0]);
933 yy=_mm_set1_pd(a[0][1]);
934 zz=_mm_set1_pd(a[0][2]);
935 _mm_storeu_pd(&r[0][0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(b00,xx),_mm_mul_pd(b10,yy)),_mm_mul_pd(b20,zz)));
936 _mm_store_sd(&r[0][2],_mm_add_sd(_mm_add_sd(_mm_mul_sd(b02,xx),_mm_mul_sd(b12,yy)),_mm_mul_sd(b22,zz)));
937 xx=_mm_set1_pd(a[1][0]);
938 yy=_mm_set1_pd(a[1][1]);
939 zz=_mm_set1_pd(a[1][2]);
940 _mm_storeu_pd(&r[1][0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(b00,xx),_mm_mul_pd(b10,yy)),_mm_mul_pd(b20,zz)));
941 _mm_store_sd(&r[1][2],_mm_add_sd(_mm_add_sd(_mm_mul_sd(b02,xx),_mm_mul_sd(b12,yy)),_mm_mul_sd(b22,zz)));
942 xx=_mm_set1_pd(a[2][0]);
943 yy=_mm_set1_pd(a[2][1]);
944 zz=_mm_set1_pd(a[2][2]);
945 _mm_storeu_pd(&r[2][0],_mm_add_pd(_mm_add_pd(_mm_mul_pd(b00,xx),_mm_mul_pd(b10,yy)),_mm_mul_pd(b20,zz)));
946 _mm_store_sd(&r[2][2],_mm_add_sd(_mm_add_sd(_mm_mul_sd(b02,xx),_mm_mul_sd(b12,yy)),_mm_mul_sd(b22,zz)));
947 return r;
948 #else
949 FXdouble x,y,z;
950 FXMat3d r;
951 x=a[0][0]; y=a[0][1]; z=a[0][2];
952 r[0][0]=x*b[0][0]+y*b[1][0]+z*b[2][0];
953 r[0][1]=x*b[0][1]+y*b[1][1]+z*b[2][1];
954 r[0][2]=x*b[0][2]+y*b[1][2]+z*b[2][2];
955 x=a[1][0]; y=a[1][1]; z=a[1][2];
956 r[1][0]=x*b[0][0]+y*b[1][0]+z*b[2][0];
957 r[1][1]=x*b[0][1]+y*b[1][1]+z*b[2][1];
958 r[1][2]=x*b[0][2]+y*b[1][2]+z*b[2][2];
959 x=a[2][0]; y=a[2][1]; z=a[2][2];
960 r[2][0]=x*b[0][0]+y*b[1][0]+z*b[2][0];
961 r[2][1]=x*b[0][1]+y*b[1][1]+z*b[2][1];
962 r[2][2]=x*b[0][2]+y*b[1][2]+z*b[2][2];
963 return r;
964 #endif
965 }
966
967
968 // Multiply scalar by matrix
operator *(FXdouble x,const FXMat3d & m)969 FXMat3d operator*(FXdouble x,const FXMat3d& m){
970 #if defined(FOX_HAS_AVX)
971 FXMat3d r;
972 _mm256_storeu_pd(&r[0][0],_mm256_mul_pd(_mm256_set1_pd(x),_mm256_loadu_pd(&m[0][0])));
973 _mm256_storeu_pd(&r[1][1],_mm256_mul_pd(_mm256_set1_pd(x),_mm256_loadu_pd(&m[1][1])));
974 _mm256_storeu_sd(&r[2][2],_mm256_mul_sd(_mm256_set1_pd(x),_mm256_loadu_sd(&m[2][2])));
975 return r;
976 #elif defined(FOX_HAS_SSE2)
977 FXMat3d r;
978 _mm_storeu_pd(&r[0][0],_mm_mul_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[0][0])));
979 _mm_storeu_pd(&r[0][2],_mm_mul_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[0][2])));
980 _mm_storeu_pd(&r[1][1],_mm_mul_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[1][1])));
981 _mm_storeu_pd(&r[2][0],_mm_mul_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[2][0])));
982 _mm_storeu_sd(&r[2][2],_mm_mul_sd(_mm_set1_pd(x),_mm_loadu_sd(&m[2][2])));
983 return r;
984 #else
985 return FXMat3d(x*m[0][0],x*m[0][1],x*m[0][2],
986 x*m[1][0],x*m[1][1],x*m[1][2],
987 x*m[2][0],x*m[2][1],x*m[2][2]);
988 #endif
989 }
990
991
992 // Multiply matrix by scalar
operator *(const FXMat3d & m,FXdouble x)993 FXMat3d operator*(const FXMat3d& m,FXdouble x){
994 #if defined(FOX_HAS_AVX)
995 FXMat3d r;
996 _mm256_storeu_pd(&r[0][0],_mm256_mul_pd(_mm256_loadu_pd(&m[0][0]),_mm256_set1_pd(x)));
997 _mm256_storeu_pd(&r[1][1],_mm256_mul_pd(_mm256_loadu_pd(&m[1][1]),_mm256_set1_pd(x)));
998 _mm256_storeu_sd(&r[2][2],_mm256_mul_sd(_mm256_loadu_sd(&m[2][2]),_mm256_set1_pd(x)));
999 return r;
1000 #elif defined(FOX_HAS_SSE2)
1001 FXMat3d r;
1002 _mm_storeu_pd(&r[0][0],_mm_mul_pd(_mm_loadu_pd(&m[0][0]),_mm_set1_pd(x)));
1003 _mm_storeu_pd(&r[0][2],_mm_mul_pd(_mm_loadu_pd(&m[0][2]),_mm_set1_pd(x)));
1004 _mm_storeu_pd(&r[1][1],_mm_mul_pd(_mm_loadu_pd(&m[1][1]),_mm_set1_pd(x)));
1005 _mm_storeu_pd(&r[2][0],_mm_mul_pd(_mm_loadu_pd(&m[2][0]),_mm_set1_pd(x)));
1006 _mm_storeu_sd(&r[2][2],_mm_mul_sd(_mm_loadu_sd(&m[2][2]),_mm_set1_pd(x)));
1007 return r;
1008 #else
1009 return FXMat3d(m[0][0]*x,m[0][1]*x,m[0][2]*x,
1010 m[1][0]*x,m[1][1]*x,m[1][2]*x,
1011 m[2][0]*x,m[2][1]*x,m[2][2]*x);
1012 #endif
1013 }
1014
1015
1016 // Divide scalar by matrix
operator /(FXdouble x,const FXMat3d & m)1017 FXMat3d operator/(FXdouble x,const FXMat3d& m){
1018 #if defined(FOX_HAS_AVX)
1019 FXMat3d r;
1020 _mm256_storeu_pd(&r[0][0],_mm256_div_pd(_mm256_set1_pd(x),_mm256_loadu_pd(&m[0][0])));
1021 _mm256_storeu_pd(&r[1][1],_mm256_div_pd(_mm256_set1_pd(x),_mm256_loadu_pd(&m[1][1])));
1022 _mm256_storeu_sd(&r[2][2],_mm256_div_sd(_mm256_set1_pd(x),_mm256_loadu_sd(&m[2][2])));
1023 return r;
1024 #elif defined(FOX_HAS_SSE2)
1025 FXMat3d r;
1026 _mm_storeu_pd(&r[0][0],_mm_div_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[0][0])));
1027 _mm_storeu_pd(&r[0][2],_mm_div_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[0][2])));
1028 _mm_storeu_pd(&r[1][1],_mm_div_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[1][1])));
1029 _mm_storeu_pd(&r[2][0],_mm_div_pd(_mm_set1_pd(x),_mm_loadu_pd(&m[2][0])));
1030 _mm_storeu_sd(&r[2][2],_mm_div_sd(_mm_set1_pd(x),_mm_loadu_sd(&m[2][2])));
1031 return r;
1032 #else
1033 return FXMat3d(x/m[0][0],x/m[0][1],x/m[0][2],
1034 x/m[1][0],x/m[1][1],x/m[1][2],
1035 x/m[2][0],x/m[2][1],x/m[2][2]);
1036 #endif
1037 }
1038
1039
1040 // Divide matrix by scalar
operator /(const FXMat3d & m,FXdouble x)1041 FXMat3d operator/(const FXMat3d& m,FXdouble x){
1042 #if defined(FOX_HAS_AVX)
1043 FXMat3d r;
1044 _mm256_storeu_pd(&r[0][0],_mm256_div_pd(_mm256_loadu_pd(&m[0][0]),_mm256_set1_pd(x)));
1045 _mm256_storeu_pd(&r[1][1],_mm256_div_pd(_mm256_loadu_pd(&m[1][1]),_mm256_set1_pd(x)));
1046 _mm256_storeu_sd(&r[2][2],_mm256_div_sd(_mm256_loadu_sd(&m[2][2]),_mm256_set1_pd(x)));
1047 return r;
1048 #elif defined(FOX_HAS_SSE2)
1049 FXMat3d r;
1050 _mm_storeu_pd(&r[0][0],_mm_div_pd(_mm_loadu_pd(&m[0][0]),_mm_set1_pd(x)));
1051 _mm_storeu_pd(&r[0][2],_mm_div_pd(_mm_loadu_pd(&m[0][2]),_mm_set1_pd(x)));
1052 _mm_storeu_pd(&r[1][1],_mm_div_pd(_mm_loadu_pd(&m[1][1]),_mm_set1_pd(x)));
1053 _mm_storeu_pd(&r[2][0],_mm_div_pd(_mm_loadu_pd(&m[2][0]),_mm_set1_pd(x)));
1054 _mm_storeu_sd(&r[2][2],_mm_div_sd(_mm_loadu_sd(&m[2][2]),_mm_set1_pd(x)));
1055 return r;
1056 #else
1057 return FXMat3d(m[0][0]/x,m[0][1]/x,m[0][2]/x,
1058 m[1][0]/x,m[1][1]/x,m[1][2]/x,
1059 m[2][0]/x,m[2][1]/x,m[2][2]/x);
1060 #endif
1061 }
1062
1063
1064 // Matrix and matrix equality
operator ==(const FXMat3d & a,const FXMat3d & b)1065 FXbool operator==(const FXMat3d& a,const FXMat3d& b){
1066 return a[0]==b[0] && a[1]==b[1] && a[2]==b[2];
1067 }
1068
1069
1070 // Matrix and matrix inequality
operator !=(const FXMat3d & a,const FXMat3d & b)1071 FXbool operator!=(const FXMat3d& a,const FXMat3d& b){
1072 return a[0]!=b[0] || a[1]!=b[1] || a[2]!=b[2];
1073 }
1074
1075
1076 // Matrix and scalar equality
operator ==(const FXMat3d & a,FXdouble n)1077 FXbool operator==(const FXMat3d& a,FXdouble n){
1078 return a[0]==n && a[1]==n && a[2]==n;
1079 }
1080
1081
1082 // Matrix and scalar inequality
operator !=(const FXMat3d & a,FXdouble n)1083 FXbool operator!=(const FXMat3d& a,FXdouble n){
1084 return a[0]!=n || a[1]!=n || a[2]!=n;
1085 }
1086
1087
1088 // Scalar and matrix equality
operator ==(FXdouble n,const FXMat3d & a)1089 FXbool operator==(FXdouble n,const FXMat3d& a){
1090 return n==a[0] && n==a[1] && n==a[2];
1091 }
1092
1093
1094 // Scalar and matrix inequality
operator !=(FXdouble n,const FXMat3d & a)1095 FXbool operator!=(FXdouble n,const FXMat3d& a){
1096 return n!=a[0] || n!=a[1] || n!=a[2];
1097 }
1098
1099
1100 // Save to archive
operator <<(FXStream & store,const FXMat3d & m)1101 FXStream& operator<<(FXStream& store,const FXMat3d& m){
1102 store << m[0] << m[1] << m[2];
1103 return store;
1104 }
1105
1106
1107 // Load from archive
operator >>(FXStream & store,FXMat3d & m)1108 FXStream& operator>>(FXStream& store,FXMat3d& m){
1109 store >> m[0] >> m[1] >> m[2];
1110 return store;
1111 }
1112
1113 }
1114