1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #pragma once
19
20 #include <cmath>
21
22 #include "Common/Common.h"
23 #include "Core/Util/AudioFormat.h" // for clamp_u8
24 #include "Common/Math/fast/fast_matrix.h"
25
26 #if defined(_M_SSE)
27 #include <emmintrin.h>
28 #if _M_SSE >= 0x401
29 #include <smmintrin.h>
30 #endif
31 #endif
32
33 namespace Math3D {
34
35 // Helper for Vec classes to clamp values.
36 template<typename T>
VecClamp(const T & v,const T & low,const T & high)37 inline static T VecClamp(const T &v, const T &low, const T &high)
38 {
39 if (v > high)
40 return high;
41 if (v < low)
42 return low;
43 return v;
44 }
45
46 template<typename T>
47 class Vec2
48 {
49 public:
50 union
51 {
52 struct
53 {
54 T x,y;
55 };
56 #if defined(_M_SSE)
57 __m128i ivec;
58 __m128 vec;
59 #endif
60 };
61
AsArray()62 T* AsArray() { return &x; }
AsArray()63 const T* AsArray() const { return &x; }
64
Vec2()65 Vec2() {}
Vec2(const T a[2])66 Vec2(const T a[2]) : x(a[0]), y(a[1]) {}
Vec2(const T & _x,const T & _y)67 Vec2(const T& _x, const T& _y) : x(_x), y(_y) {}
68 #if defined(_M_SSE)
Vec2(const __m128 & _vec)69 Vec2(const __m128 &_vec) : vec(_vec) {}
Vec2(const __m128i & _ivec)70 Vec2(const __m128i &_ivec) : ivec(_ivec) {}
71 #endif
72
73 template<typename T2>
Cast()74 Vec2<T2> Cast() const
75 {
76 return Vec2<T2>((T2)x, (T2)y);
77 }
78
AssignToAll(const T & f)79 static Vec2 AssignToAll(const T& f)
80 {
81 return Vec2<T>(f, f);
82 }
83
Write(T a[2])84 void Write(T a[2])
85 {
86 a[0] = x; a[1] = y;
87 }
88
89 Vec2 operator +(const Vec2& other) const
90 {
91 return Vec2(x+other.x, y+other.y);
92 }
93 void operator += (const Vec2 &other)
94 {
95 x+=other.x; y+=other.y;
96 }
97 Vec2 operator -(const Vec2& other) const
98 {
99 return Vec2(x-other.x, y-other.y);
100 }
101 void operator -= (const Vec2& other)
102 {
103 x-=other.x; y-=other.y;
104 }
105 Vec2 operator -() const
106 {
107 return Vec2(-x,-y);
108 }
109 Vec2 operator * (const Vec2& other) const
110 {
111 return Vec2(x*other.x, y*other.y);
112 }
113 template<typename V>
114 Vec2 operator * (const V& f) const
115 {
116 return Vec2(x*f,y*f);
117 }
118 template<typename V>
119 void operator *= (const V& f)
120 {
121 x*=f; y*=f;
122 }
123 template<typename V>
124 Vec2 operator / (const V& f) const
125 {
126 return Vec2(x/f,y/f);
127 }
128 template<typename V>
129 void operator /= (const V& f)
130 {
131 *this = *this / f;
132 }
133
Length2()134 T Length2() const
135 {
136 return x*x + y*y;
137 }
138
Clamp(const T & l,const T & h)139 Vec2 Clamp(const T &l, const T &h) const
140 {
141 return Vec2(VecClamp(x, l, h), VecClamp(y, l, h));
142 }
143
144 // Only implemented for T=float
145 float Length() const;
146 void SetLength(const float l);
147 Vec2 WithLength(const float l) const;
148 float Distance2To(Vec2 &other);
149 Vec2 Normalized() const;
150 float Normalize(); // returns the previous length, which is often useful
151
152 T& operator [] (int i) //allow vector[1] = 3 (vector.y=3)
153 {
154 return *((&x) + i);
155 }
156 T operator [] (const int i) const
157 {
158 return *((&x) + i);
159 }
160
SetZero()161 void SetZero()
162 {
163 x=0; y=0;
164 }
165
166 // Common aliases: UV (texel coordinates), ST (texture coordinates)
u()167 T& u() { return x; }
v()168 T& v() { return y; }
s()169 T& s() { return x; }
t()170 T& t() { return y; }
171
u()172 const T& u() const { return x; }
v()173 const T& v() const { return y; }
s()174 const T& s() const { return x; }
t()175 const T& t() const { return y; }
176
177 // swizzlers - create a subvector of specific components
yx()178 const Vec2 yx() const { return Vec2(y, x); }
vu()179 const Vec2 vu() const { return Vec2(y, x); }
ts()180 const Vec2 ts() const { return Vec2(y, x); }
181 };
182
183 template<typename T>
184 class Vec3Packed;
185
186 template<typename T>
187 class Vec3
188 {
189 public:
190 union
191 {
192 struct
193 {
194 T x,y,z;
195 };
196 #if defined(_M_SSE)
197 __m128i ivec;
198 __m128 vec;
199 #endif
200 };
201
AsArray()202 T* AsArray() { return &x; }
AsArray()203 const T* AsArray() const { return &x; }
204
Vec3()205 Vec3() {}
Vec3(const T a[3])206 Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
Vec3(const T & _x,const T & _y,const T & _z)207 Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
Vec3(const Vec2<T> & _xy,const T & _z)208 Vec3(const Vec2<T>& _xy, const T& _z) : x(_xy.x), y(_xy.y), z(_z) {}
209 #if defined(_M_SSE)
Vec3(const __m128 & _vec)210 Vec3(const __m128 &_vec) : vec(_vec) {}
Vec3(const __m128i & _ivec)211 Vec3(const __m128i &_ivec) : ivec(_ivec) {}
Vec3(const Vec3Packed<T> & _xyz)212 Vec3(const Vec3Packed<T> &_xyz) {
213 vec = _mm_loadu_ps(_xyz.AsArray());
214 }
215 #else
Vec3(const Vec3Packed<T> & _xyz)216 Vec3(const Vec3Packed<T> &_xyz) : x(_xyz.x), y(_xyz.y), z(_xyz.z) {}
217 #endif
218
219 template<typename T2>
Cast()220 Vec3<T2> Cast() const
221 {
222 return Vec3<T2>((T2)x, (T2)y, (T2)z);
223 }
224
225 // Only implemented for T=int and T=float
226 static Vec3 FromRGB(unsigned int rgb);
227 unsigned int ToRGB() const; // alpha bits set to zero
228
AssignToAll(const T & f)229 static Vec3 AssignToAll(const T& f)
230 {
231 return Vec3<T>(f, f, f);
232 }
233
Write(T a[3])234 void Write(T a[3])
235 {
236 a[0] = x; a[1] = y; a[2] = z;
237 }
238
239 Vec3 operator +(const Vec3 &other) const
240 {
241 return Vec3(x+other.x, y+other.y, z+other.z);
242 }
243 void operator += (const Vec3 &other)
244 {
245 x+=other.x; y+=other.y; z+=other.z;
246 }
247 Vec3 operator -(const Vec3 &other) const
248 {
249 return Vec3(x-other.x, y-other.y, z-other.z);
250 }
251 void operator -= (const Vec3 &other)
252 {
253 x-=other.x; y-=other.y; z-=other.z;
254 }
255 Vec3 operator -() const
256 {
257 return Vec3(-x,-y,-z);
258 }
259 Vec3 operator * (const Vec3 &other) const
260 {
261 return Vec3(x*other.x, y*other.y, z*other.z);
262 }
263 template<typename V>
264 Vec3 operator * (const V& f) const
265 {
266 return Vec3(x*f,y*f,z*f);
267 }
268 template<typename V>
269 void operator *= (const V& f)
270 {
271 x*=f; y*=f; z*=f;
272 }
273 template<typename V>
274 Vec3 operator / (const V& f) const
275 {
276 return Vec3(x/f,y/f,z/f);
277 }
278 template<typename V>
279 void operator /= (const V& f)
280 {
281 *this = *this / f;
282 }
283
Length2()284 T Length2() const
285 {
286 return x*x + y*y + z*z;
287 }
288
Clamp(const T & l,const T & h)289 Vec3 Clamp(const T &l, const T &h) const
290 {
291 return Vec3(VecClamp(x, l, h), VecClamp(y, l, h), VecClamp(z, l, h));
292 }
293
294 // Only implemented for T=float
295 float Length() const;
296 void SetLength(const float l);
297 Vec3 WithLength(const float l) const;
298 float Distance2To(Vec3 &other);
299 Vec3 Normalized(bool useSSE4 = false) const;
300 Vec3 NormalizedOr001(bool useSSE4 = false) const;
301 float Normalize(); // returns the previous length, which is often useful
302 float NormalizeOr001();
303
304 T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
305 {
306 return *((&x) + i);
307 }
308 T operator [] (const int i) const
309 {
310 return *((&x) + i);
311 }
312
SetZero()313 void SetZero()
314 {
315 x=0; y=0; z=0;
316 }
317
318 // Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
u()319 T& u() { return x; }
v()320 T& v() { return y; }
w()321 T& w() { return z; }
322
r()323 T& r() { return x; }
g()324 T& g() { return y; }
b()325 T& b() { return z; }
326
s()327 T& s() { return x; }
t()328 T& t() { return y; }
q()329 T& q() { return z; }
330
u()331 const T& u() const { return x; }
v()332 const T& v() const { return y; }
w()333 const T& w() const { return z; }
334
r()335 const T& r() const { return x; }
g()336 const T& g() const { return y; }
b()337 const T& b() const { return z; }
338
s()339 const T& s() const { return x; }
t()340 const T& t() const { return y; }
q()341 const T& q() const { return z; }
342
343 // swizzlers - create a subvector of specific components
344 // e.g. Vec2 uv() { return Vec2(x,y); }
345 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
346 #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
347 #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
348 _DEFINE_SWIZZLER2(a, b, a##b); \
349 _DEFINE_SWIZZLER2(a, b, a2##b2); \
350 _DEFINE_SWIZZLER2(a, b, a3##b3); \
351 _DEFINE_SWIZZLER2(a, b, a4##b4); \
352 _DEFINE_SWIZZLER2(b, a, b##a); \
353 _DEFINE_SWIZZLER2(b, a, b2##a2); \
354 _DEFINE_SWIZZLER2(b, a, b3##a3); \
355 _DEFINE_SWIZZLER2(b, a, b4##a4);
356
357 DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
358 DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
359 DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
360 #undef DEFINE_SWIZZLER2
361 #undef _DEFINE_SWIZZLER2
362 };
363
364 template<typename T>
365 class Vec3Packed
366 {
367 public:
368 union
369 {
370 struct
371 {
372 T x,y,z;
373 };
374 };
375
AsArray()376 T* AsArray() { return &x; }
AsArray()377 const T* AsArray() const { return &x; }
378
Vec3Packed()379 Vec3Packed() {}
Vec3Packed(const T a[3])380 Vec3Packed(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
Vec3Packed(const T & _x,const T & _y,const T & _z)381 Vec3Packed(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
Vec3Packed(const Vec2<T> & _xy,const T & _z)382 Vec3Packed(const Vec2<T>& _xy, const T& _z) : x(_xy.x), y(_xy.y), z(_z) {}
Vec3Packed(const Vec3<T> & _xyz)383 Vec3Packed(const Vec3<T>& _xyz) {
384 memcpy(&x, _xyz.AsArray(), sizeof(float) * 3);
385 }
386
387 template<typename T2>
Cast()388 Vec3Packed<T2> Cast() const
389 {
390 return Vec3Packed<T2>((T2)x, (T2)y, (T2)z);
391 }
392
393 // Only implemented for T=int and T=float
394 static Vec3Packed FromRGB(unsigned int rgb);
395 unsigned int ToRGB() const; // alpha bits set to zero
396
AssignToAll(const T & f)397 static Vec3Packed AssignToAll(const T& f)
398 {
399 return Vec3Packed<T>(f, f, f);
400 }
401
Write(T a[3])402 void Write(T a[3])
403 {
404 a[0] = x; a[1] = y; a[2] = z;
405 }
406
407 Vec3Packed operator +(const Vec3Packed &other) const
408 {
409 return Vec3Packed(x+other.x, y+other.y, z+other.z);
410 }
411 void operator += (const Vec3Packed &other)
412 {
413 x+=other.x; y+=other.y; z+=other.z;
414 }
415 Vec3Packed operator -(const Vec3Packed &other) const
416 {
417 return Vec3Packed(x-other.x, y-other.y, z-other.z);
418 }
419 void operator -= (const Vec3Packed &other)
420 {
421 x-=other.x; y-=other.y; z-=other.z;
422 }
423 Vec3Packed operator -() const
424 {
425 return Vec3Packed(-x,-y,-z);
426 }
427 Vec3Packed operator * (const Vec3Packed &other) const
428 {
429 return Vec3Packed(x*other.x, y*other.y, z*other.z);
430 }
431 template<typename V>
432 Vec3Packed operator * (const V& f) const
433 {
434 return Vec3Packed(x*f,y*f,z*f);
435 }
436 template<typename V>
437 void operator *= (const V& f)
438 {
439 x*=f; y*=f; z*=f;
440 }
441 template<typename V>
442 Vec3Packed operator / (const V& f) const
443 {
444 return Vec3Packed(x/f,y/f,z/f);
445 }
446 template<typename V>
447 void operator /= (const V& f)
448 {
449 *this = *this / f;
450 }
451
Length2()452 T Length2() const
453 {
454 return x*x + y*y + z*z;
455 }
456
Clamp(const T & l,const T & h)457 Vec3Packed Clamp(const T &l, const T &h) const
458 {
459 return Vec3Packed(VecClamp(x, l, h), VecClamp(y, l, h), VecClamp(z, l, h));
460 }
461
462 // Only implemented for T=float
463 float Length() const;
464 void SetLength(const float l);
465 Vec3Packed WithLength(const float l) const;
466 float Distance2To(Vec3Packed &other);
467 Vec3Packed Normalized() const;
468 float Normalize(); // returns the previous length, which is often useful
469
470 T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
471 {
472 return *((&x) + i);
473 }
474 T operator [] (const int i) const
475 {
476 return *((&x) + i);
477 }
478
SetZero()479 void SetZero()
480 {
481 x=0; y=0; z=0;
482 }
483
484 // Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
u()485 T& u() { return x; }
v()486 T& v() { return y; }
w()487 T& w() { return z; }
488
r()489 T& r() { return x; }
g()490 T& g() { return y; }
b()491 T& b() { return z; }
492
s()493 T& s() { return x; }
t()494 T& t() { return y; }
q()495 T& q() { return z; }
496
u()497 const T& u() const { return x; }
v()498 const T& v() const { return y; }
w()499 const T& w() const { return z; }
500
r()501 const T& r() const { return x; }
g()502 const T& g() const { return y; }
b()503 const T& b() const { return z; }
504
s()505 const T& s() const { return x; }
t()506 const T& t() const { return y; }
q()507 const T& q() const { return z; }
508
509 // swizzlers - create a subvector of specific components
510 // e.g. Vec2 uv() { return Vec2(x,y); }
511 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
512 #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
513 #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
514 _DEFINE_SWIZZLER2(a, b, a##b); \
515 _DEFINE_SWIZZLER2(a, b, a2##b2); \
516 _DEFINE_SWIZZLER2(a, b, a3##b3); \
517 _DEFINE_SWIZZLER2(a, b, a4##b4); \
518 _DEFINE_SWIZZLER2(b, a, b##a); \
519 _DEFINE_SWIZZLER2(b, a, b2##a2); \
520 _DEFINE_SWIZZLER2(b, a, b3##a3); \
521 _DEFINE_SWIZZLER2(b, a, b4##a4);
522
523 DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
524 DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
525 DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
526 #undef DEFINE_SWIZZLER2
527 #undef _DEFINE_SWIZZLER2
528 };
529
530 template<typename T>
531 class Vec4
532 {
533 public:
534 union
535 {
536 struct
537 {
538 T x,y,z,w;
539 };
540 #if defined(_M_SSE)
541 __m128i ivec;
542 __m128 vec;
543 #endif
544 };
545
AsArray()546 T* AsArray() { return &x; }
AsArray()547 const T* AsArray() const { return &x; }
548
Vec4()549 Vec4() {}
Vec4(const T a[4])550 Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
Vec4(const T & _x,const T & _y,const T & _z,const T & _w)551 Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {}
Vec4(const Vec2<T> & _xy,const T & _z,const T & _w)552 Vec4(const Vec2<T>& _xy, const T& _z, const T& _w) : x(_xy.x), y(_xy.y), z(_z), w(_w) {}
Vec4(const Vec3<T> & _xyz,const T & _w)553 Vec4(const Vec3<T>& _xyz, const T& _w) : x(_xyz.x), y(_xyz.y), z(_xyz.z), w(_w) {}
554 #if defined(_M_SSE)
Vec4(const __m128 & _vec)555 Vec4(const __m128 &_vec) : vec(_vec) {}
Vec4(const __m128i & _ivec)556 Vec4(const __m128i &_ivec) : ivec(_ivec) {}
557 #endif
558
559 template<typename T2>
Cast()560 Vec4<T2> Cast() const
561 {
562 return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w);
563 }
564
565 // Only implemented for T=int and T=float
566 static Vec4 FromRGBA(unsigned int rgba);
567 static Vec4 FromRGBA(const u8 *rgba);
568 unsigned int ToRGBA() const;
569 void ToRGBA(u8 *rgba) const;
570
AssignToAll(const T & f)571 static Vec4 AssignToAll(const T& f)
572 {
573 return Vec4<T>(f, f, f, f);
574 }
575
Write(T a[4])576 void Write(T a[4])
577 {
578 a[0] = x; a[1] = y; a[2] = z; a[3] = w;
579 }
580
581 Vec4 operator +(const Vec4& other) const
582 {
583 return Vec4(x+other.x, y+other.y, z+other.z, w+other.w);
584 }
585 void operator += (const Vec4& other)
586 {
587 x+=other.x; y+=other.y; z+=other.z; w+=other.w;
588 }
589 Vec4 operator -(const Vec4 &other) const
590 {
591 return Vec4(x-other.x, y-other.y, z-other.z, w-other.w);
592 }
593 void operator -= (const Vec4 &other)
594 {
595 x-=other.x; y-=other.y; z-=other.z; w-=other.w;
596 }
597 Vec4 operator -() const
598 {
599 return Vec4(-x,-y,-z,-w);
600 }
601 Vec4 operator * (const Vec4 &other) const
602 {
603 return Vec4(x*other.x, y*other.y, z*other.z, w*other.w);
604 }
605 Vec4 operator | (const Vec4 &other) const
606 {
607 return Vec4(x | other.x, y | other.y, z | other.z, w | other.w);
608 }
609 template<typename V>
610 Vec4 operator * (const V& f) const
611 {
612 return Vec4(x*f,y*f,z*f,w*f);
613 }
614 template<typename V>
615 void operator *= (const V& f)
616 {
617 x*=f; y*=f; z*=f; w*=f;
618 }
619 template<typename V>
620 Vec4 operator / (const V& f) const
621 {
622 return Vec4(x/f,y/f,z/f,w/f);
623 }
624 template<typename V>
625 void operator /= (const V& f)
626 {
627 *this = *this / f;
628 }
629
630 bool operator ==(const Vec4 &other) const {
631 return x == other.x && y == other.y && z == other.z && w == other.w;
632 }
633
Length2()634 T Length2() const
635 {
636 return x*x + y*y + z*z + w*w;
637 }
638
Clamp(const T & l,const T & h)639 Vec4 Clamp(const T &l, const T &h) const
640 {
641 return Vec4(VecClamp(x, l, h), VecClamp(y, l, h), VecClamp(z, l, h), VecClamp(w, l, h));
642 }
643
Reciprocal()644 Vec4 Reciprocal() const
645 {
646 const T one = 1.0f;
647 return Vec4(one / x, one / y, one / z, one / w);
648 }
649
650 // Only implemented for T=float
651 float Length() const;
652 void SetLength(const float l);
653 Vec4 WithLength(const float l) const;
654 float Distance2To(Vec4 &other);
655 Vec4 Normalized() const;
656 float Normalize(); // returns the previous length, which is often useful
657
658 T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
659 {
660 return *((&x) + i);
661 }
662 T operator [] (const int i) const
663 {
664 return *((&x) + i);
665 }
666
SetZero()667 void SetZero()
668 {
669 x=0; y=0; z=0; w=0;
670 }
671
672 // Common alias: RGBA (colors)
r()673 T& r() { return x; }
g()674 T& g() { return y; }
b()675 T& b() { return z; }
a()676 T& a() { return w; }
677
r()678 const T& r() const { return x; }
g()679 const T& g() const { return y; }
b()680 const T& b() const { return z; }
a()681 const T& a() const { return w; }
682
683 // swizzlers - create a subvector of specific components
684 // e.g. Vec2 uv() { return Vec2(x,y); }
685 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
686 #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
687 #define DEFINE_SWIZZLER2(a, b, a2, b2) \
688 _DEFINE_SWIZZLER2(a, b, a##b); \
689 _DEFINE_SWIZZLER2(a, b, a2##b2); \
690 _DEFINE_SWIZZLER2(b, a, b##a); \
691 _DEFINE_SWIZZLER2(b, a, b2##a2);
692
693 DEFINE_SWIZZLER2(x, y, r, g);
694 DEFINE_SWIZZLER2(x, z, r, b);
695 DEFINE_SWIZZLER2(x, w, r, a);
696 DEFINE_SWIZZLER2(y, z, g, b);
697 DEFINE_SWIZZLER2(y, w, g, a);
698 DEFINE_SWIZZLER2(z, w, b, a);
699 #undef DEFINE_SWIZZLER2
700 #undef _DEFINE_SWIZZLER2
701
702 #define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); }
703 #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
704 _DEFINE_SWIZZLER3(a, b, c, a##b##c); \
705 _DEFINE_SWIZZLER3(a, c, b, a##c##b); \
706 _DEFINE_SWIZZLER3(b, a, c, b##a##c); \
707 _DEFINE_SWIZZLER3(b, c, a, b##c##a); \
708 _DEFINE_SWIZZLER3(c, a, b, c##a##b); \
709 _DEFINE_SWIZZLER3(c, b, a, c##b##a); \
710 _DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \
711 _DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \
712 _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \
713 _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \
714 _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
715 _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2);
716
717 DEFINE_SWIZZLER3(x, y, z, r, g, b);
718 DEFINE_SWIZZLER3(x, y, w, r, g, a);
719 DEFINE_SWIZZLER3(x, z, w, r, b, a);
720 DEFINE_SWIZZLER3(y, z, w, g, b, a);
721 #undef DEFINE_SWIZZLER3
722 #undef _DEFINE_SWIZZLER3
723 };
724
725
726 template<typename BaseType>
727 class Mat3x3
728 {
729 public:
730 // Convention: first three values = first column
Mat3x3(const BaseType values[])731 Mat3x3(const BaseType values[])
732 {
733 for (unsigned int i = 0; i < 3*3; ++i)
734 {
735 this->values[i] = values[i];
736 }
737 }
738
Mat3x3(BaseType _00,BaseType _01,BaseType _02,BaseType _10,BaseType _11,BaseType _12,BaseType _20,BaseType _21,BaseType _22)739 Mat3x3(BaseType _00, BaseType _01, BaseType _02, BaseType _10, BaseType _11, BaseType _12, BaseType _20, BaseType _21, BaseType _22)
740 {
741 values[0] = _00;
742 values[1] = _01;
743 values[2] = _02;
744 values[3] = _10;
745 values[4] = _11;
746 values[5] = _12;
747 values[6] = _20;
748 values[7] = _21;
749 values[8] = _22;
750 }
751
752 template<typename T>
753 Vec3<T> operator * (const Vec3<T>& vec) const
754 {
755 Vec3<T> ret;
756 ret.x = values[0]*vec.x + values[3]*vec.y + values[6]*vec.z;
757 ret.y = values[1]*vec.x + values[4]*vec.y + values[7]*vec.z;
758 ret.z = values[2]*vec.x + values[5]*vec.y + values[8]*vec.z;
759 return ret;
760 }
761
Inverse()762 Mat3x3 Inverse() const
763 {
764 float a = values[0];
765 float b = values[1];
766 float c = values[2];
767 float d = values[3];
768 float e = values[4];
769 float f = values[5];
770 float g = values[6];
771 float h = values[7];
772 float i = values[8];
773 return Mat3x3(e*i-f*h, f*g-d*i, d*h-e*g,
774 c*h-b*i, a*i-c*g, b*g-a*h,
775 b*f-c*e, c*d-a*f, a*e-b*d) / Det();
776 }
777
Det()778 BaseType Det() const
779 {
780 return values[0]*values[4]*values[8] + values[3]*values[7]*values[2] +
781 values[6]*values[1]*values[5] - values[2]*values[4]*values[6] -
782 values[5]*values[7]*values[0] - values[8]*values[1]*values[3];
783 }
784
785 Mat3x3 operator / (const BaseType& val) const
786 {
787 return Mat3x3(values[0]/val, values[1]/val, values[2]/val,
788 values[3]/val, values[4]/val, values[5]/val,
789 values[6]/val, values[7]/val, values[8]/val);
790 }
791
792 private:
793 BaseType values[3*3];
794 };
795
796
797 template<typename BaseType>
798 class Mat4x4
799 {
800 public:
801 // Convention: first four values in arrow = first column
Mat4x4(const BaseType values[])802 Mat4x4(const BaseType values[])
803 {
804 for (unsigned int i = 0; i < 4*4; ++i)
805 {
806 this->values[i] = values[i];
807 }
808 }
809
810 template<typename T>
811 Vec4<T> operator * (const Vec4<T>& vec) const
812 {
813 Vec4<T> ret;
814 ret.x = values[0]*vec.x + values[4]*vec.y + values[8]*vec.z + values[12]*vec.w;
815 ret.y = values[1]*vec.x + values[5]*vec.y + values[9]*vec.z + values[13]*vec.w;
816 ret.z = values[2]*vec.x + values[6]*vec.y + values[10]*vec.z + values[14]*vec.w;
817 ret.w = values[3]*vec.x + values[7]*vec.y + values[11]*vec.z + values[15]*vec.w;
818 return ret;
819 }
820
821 private:
822 BaseType values[4*4];
823 };
824
825 }; // namespace Math3D
826
827 typedef Math3D::Vec2<float> Vec2f;
828 typedef Math3D::Vec3<float> Vec3f;
829 typedef Math3D::Vec3Packed<float> Vec3Packedf;
830 typedef Math3D::Vec4<float> Vec4f;
831
832 // v and vecOut must point to different memory.
Vec3ByMatrix43(float vecOut[3],const float v[3],const float m[12])833 inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12]) {
834 vecOut[0] = v[0] * m[0] + v[1] * m[3] + v[2] * m[6] + m[9];
835 vecOut[1] = v[0] * m[1] + v[1] * m[4] + v[2] * m[7] + m[10];
836 vecOut[2] = v[0] * m[2] + v[1] * m[5] + v[2] * m[8] + m[11];
837 }
838
Vec3ByMatrix44(float vecOut[4],const float v[3],const float m[16])839 inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16])
840 {
841 vecOut[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + m[12];
842 vecOut[1] = v[0] * m[1] + v[1] * m[5] + v[2] * m[9] + m[13];
843 vecOut[2] = v[0] * m[2] + v[1] * m[6] + v[2] * m[10] + m[14];
844 vecOut[3] = v[0] * m[3] + v[1] * m[7] + v[2] * m[11] + m[15];
845 }
846
Vec4ByMatrix44(float vecOut[4],const float v[4],const float m[16])847 inline void Vec4ByMatrix44(float vecOut[4], const float v[4], const float m[16])
848 {
849 vecOut[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + v[3] * m[12];
850 vecOut[1] = v[0] * m[1] + v[1] * m[5] + v[2] * m[9] + v[3] * m[13];
851 vecOut[2] = v[0] * m[2] + v[1] * m[6] + v[2] * m[10] + v[3] * m[14];
852 vecOut[3] = v[0] * m[3] + v[1] * m[7] + v[2] * m[11] + v[3] * m[15];
853 }
854
855
Norm3ByMatrix43(float vecOut[3],const float v[3],const float m[12])856 inline void Norm3ByMatrix43(float vecOut[3], const float v[3], const float m[12])
857 {
858 vecOut[0] = v[0] * m[0] + v[1] * m[3] + v[2] * m[6];
859 vecOut[1] = v[0] * m[1] + v[1] * m[4] + v[2] * m[7];
860 vecOut[2] = v[0] * m[2] + v[1] * m[5] + v[2] * m[8];
861 }
862
Matrix4ByMatrix4(float out[16],const float a[16],const float b[16])863 inline void Matrix4ByMatrix4(float out[16], const float a[16], const float b[16]) {
864 fast_matrix_mul_4x4(out, b, a);
865 }
866
ConvertMatrix4x3To4x4(float * m4x4,const float * m4x3)867 inline void ConvertMatrix4x3To4x4(float *m4x4, const float *m4x3) {
868 m4x4[0] = m4x3[0];
869 m4x4[1] = m4x3[1];
870 m4x4[2] = m4x3[2];
871 m4x4[3] = 0.0f;
872 m4x4[4] = m4x3[3];
873 m4x4[5] = m4x3[4];
874 m4x4[6] = m4x3[5];
875 m4x4[7] = 0.0f;
876 m4x4[8] = m4x3[6];
877 m4x4[9] = m4x3[7];
878 m4x4[10] = m4x3[8];
879 m4x4[11] = 0.0f;
880 m4x4[12] = m4x3[9];
881 m4x4[13] = m4x3[10];
882 m4x4[14] = m4x3[11];
883 m4x4[15] = 1.0f;
884 }
885
ConvertMatrix4x3To4x4Transposed(float * m4x4,const float * m4x3)886 inline void ConvertMatrix4x3To4x4Transposed(float *m4x4, const float *m4x3) {
887 m4x4[0] = m4x3[0];
888 m4x4[1] = m4x3[3];
889 m4x4[2] = m4x3[6];
890 m4x4[3] = m4x3[9];
891 m4x4[4] = m4x3[1];
892 m4x4[5] = m4x3[4];
893 m4x4[6] = m4x3[7];
894 m4x4[7] = m4x3[10];
895 m4x4[8] = m4x3[2];
896 m4x4[9] = m4x3[5];
897 m4x4[10] = m4x3[8];
898 m4x4[11] = m4x3[11];
899 m4x4[12] = 0.0f;
900 m4x4[13] = 0.0f;
901 m4x4[14] = 0.0f;
902 m4x4[15] = 1.0f;
903 }
904
905 // 0369
906 // 147A
907 // 258B
908 // ->>-
909 // 0123
910 // 4567
911 // 89AB
912 // Don't see a way to SIMD that. Should be pretty fast anyway.
ConvertMatrix4x3To3x4Transposed(float * m4x4,const float * m4x3)913 inline void ConvertMatrix4x3To3x4Transposed(float *m4x4, const float *m4x3) {
914 m4x4[0] = m4x3[0];
915 m4x4[1] = m4x3[3];
916 m4x4[2] = m4x3[6];
917 m4x4[3] = m4x3[9];
918 m4x4[4] = m4x3[1];
919 m4x4[5] = m4x3[4];
920 m4x4[6] = m4x3[7];
921 m4x4[7] = m4x3[10];
922 m4x4[8] = m4x3[2];
923 m4x4[9] = m4x3[5];
924 m4x4[10] = m4x3[8];
925 m4x4[11] = m4x3[11];
926 }
927
Transpose4x4(float out[16],const float in[16])928 inline void Transpose4x4(float out[16], const float in[16]) {
929 for (int i = 0; i < 4; i++) {
930 for (int j = 0; j < 4; j++) {
931 out[i * 4 + j] = in[j * 4 + i];
932 }
933 }
934 }
935
Vec3Dot(const float v1[3],const float v2[3])936 inline float Vec3Dot(const float v1[3], const float v2[3])
937 {
938 return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
939 }
940
941 namespace Math3D {
942
943 template<typename T>
Dot(const Vec2<T> & a,const Vec2<T> & b)944 inline T Dot(const Vec2<T>& a, const Vec2<T>& b)
945 {
946 return a.x*b.x + a.y*b.y;
947 }
948
949 template<typename T>
Dot(const Vec3<T> & a,const Vec3<T> & b)950 inline T Dot(const Vec3<T>& a, const Vec3<T>& b)
951 {
952 return a.x*b.x + a.y*b.y + a.z*b.z;
953 }
954
955 template<typename T>
Dot(const Vec4<T> & a,const Vec4<T> & b)956 inline T Dot(const Vec4<T>& a, const Vec4<T>& b)
957 {
958 return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
959 }
960
961 template<typename T>
Cross(const Vec3<T> & a,const Vec3<T> & b)962 inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b)
963 {
964 return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
965 }
966
967 template<typename T>
Cross(const Vec3Packed<T> & a,const Vec3Packed<T> & b)968 inline Vec3Packed<T> Cross(const Vec3Packed<T>& a, const Vec3Packed<T>& b)
969 {
970 return Vec3Packed<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
971 }
972
973 template<>
FromRGB(unsigned int rgb)974 inline Vec3<float> Vec3<float>::FromRGB(unsigned int rgb)
975 {
976 #if defined(_M_SSE)
977 __m128i z = _mm_setzero_si128();
978 __m128i c = _mm_cvtsi32_si128(rgb);
979 c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
980 return Vec3<float>(_mm_mul_ps(_mm_cvtepi32_ps(c), _mm_set_ps1(1.0f / 255.0f)));
981 #else
982 return Vec3((rgb & 0xFF) * (1.0f/255.0f),
983 ((rgb >> 8) & 0xFF) * (1.0f/255.0f),
984 ((rgb >> 16) & 0xFF) * (1.0f/255.0f));
985 #endif
986 }
987
988 template<>
FromRGB(unsigned int rgb)989 inline Vec3<int> Vec3<int>::FromRGB(unsigned int rgb)
990 {
991 #if defined(_M_SSE)
992 __m128i z = _mm_setzero_si128();
993 __m128i c = _mm_cvtsi32_si128(rgb);
994 c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
995 return Vec3<int>(c);
996 #else
997 return Vec3(rgb & 0xFF, (rgb >> 8) & 0xFF, (rgb >> 16) & 0xFF);
998 #endif
999 }
1000
1001 template<>
ToRGB()1002 __forceinline unsigned int Vec3<float>::ToRGB() const
1003 {
1004 #if defined(_M_SSE)
1005 __m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
1006 __m128i c16 = _mm_packs_epi32(c, c);
1007 return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
1008 #else
1009 return (clamp_u8((int)(r() * 255.f)) << 0) |
1010 (clamp_u8((int)(g() * 255.f)) << 8) |
1011 (clamp_u8((int)(b() * 255.f)) << 16);
1012 #endif
1013 }
1014
1015 template<>
ToRGB()1016 __forceinline unsigned int Vec3<int>::ToRGB() const
1017 {
1018 #if defined(_M_SSE)
1019 __m128i c16 = _mm_packs_epi32(ivec, ivec);
1020 return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
1021 #else
1022 return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16);
1023 #endif
1024 }
1025
1026 template<>
FromRGBA(unsigned int rgba)1027 inline Vec4<float> Vec4<float>::FromRGBA(unsigned int rgba)
1028 {
1029 #if defined(_M_SSE)
1030 __m128i z = _mm_setzero_si128();
1031 __m128i c = _mm_cvtsi32_si128(rgba);
1032 c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
1033 return Vec4<float>(_mm_mul_ps(_mm_cvtepi32_ps(c), _mm_set_ps1(1.0f / 255.0f)));
1034 #else
1035 return Vec4((rgba & 0xFF) * (1.0f/255.0f),
1036 ((rgba >> 8) & 0xFF) * (1.0f/255.0f),
1037 ((rgba >> 16) & 0xFF) * (1.0f/255.0f),
1038 ((rgba >> 24) & 0xFF) * (1.0f/255.0f));
1039 #endif
1040 }
1041
1042 template<typename T>
FromRGBA(const u8 * rgba)1043 inline Vec4<T> Vec4<T>::FromRGBA(const u8 *rgba)
1044 {
1045 return Vec4<T>::FromRGBA(*(unsigned int *)rgba);
1046 }
1047
1048 template<>
FromRGBA(unsigned int rgba)1049 inline Vec4<int> Vec4<int>::FromRGBA(unsigned int rgba)
1050 {
1051 #if defined(_M_SSE)
1052 __m128i z = _mm_setzero_si128();
1053 __m128i c = _mm_cvtsi32_si128(rgba);
1054 c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
1055 return Vec4<int>(c);
1056 #else
1057 return Vec4(rgba & 0xFF, (rgba >> 8) & 0xFF, (rgba >> 16) & 0xFF, (rgba >> 24) & 0xFF);
1058 #endif
1059 }
1060
1061 template<>
ToRGBA()1062 __forceinline unsigned int Vec4<float>::ToRGBA() const
1063 {
1064 #if defined(_M_SSE)
1065 __m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
1066 __m128i c16 = _mm_packs_epi32(c, c);
1067 return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
1068 #else
1069 return (clamp_u8((int)(r() * 255.f)) << 0) |
1070 (clamp_u8((int)(g() * 255.f)) << 8) |
1071 (clamp_u8((int)(b() * 255.f)) << 16) |
1072 (clamp_u8((int)(a() * 255.f)) << 24);
1073 #endif
1074 }
1075
1076 template<>
ToRGBA()1077 __forceinline unsigned int Vec4<int>::ToRGBA() const
1078 {
1079 #if defined(_M_SSE)
1080 __m128i c16 = _mm_packs_epi32(ivec, ivec);
1081 return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
1082 #else
1083 return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16) | (clamp_u8(a()) << 24);
1084 #endif
1085 }
1086
1087 template<typename T>
ToRGBA(u8 * rgba)1088 __forceinline void Vec4<T>::ToRGBA(u8 *rgba) const
1089 {
1090 *(u32 *)rgba = ToRGBA();
1091 }
1092
1093 #if defined(_M_SSE)
1094 // Specialized for SIMD optimization
1095
1096 // Vec3<float> operation
1097 template<>
1098 inline void Vec3<float>::operator += (const Vec3<float> &other)
1099 {
1100 vec = _mm_add_ps(vec, other.vec);
1101 }
1102
1103 template<>
1104 inline Vec3<float> Vec3<float>::operator + (const Vec3 &other) const
1105 {
1106 return Vec3<float>(_mm_add_ps(vec, other.vec));
1107 }
1108
1109 template<>
1110 inline Vec3<float> Vec3<float>::operator * (const Vec3 &other) const
1111 {
1112 return Vec3<float>(_mm_mul_ps(vec, other.vec));
1113 }
1114
1115 template<> template<>
1116 inline Vec3<float> Vec3<float>::operator * (const float &other) const
1117 {
1118 return Vec3<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
1119 }
1120
1121 // Vec4<float> operation
1122 template<>
1123 inline void Vec4<float>::operator += (const Vec4<float> &other)
1124 {
1125 vec = _mm_add_ps(vec, other.vec);
1126 }
1127
1128 template<>
1129 inline Vec4<float> Vec4<float>::operator + (const Vec4 &other) const
1130 {
1131 return Vec4<float>(_mm_add_ps(vec, other.vec));
1132 }
1133
1134 template<>
1135 inline Vec4<float> Vec4<float>::operator * (const Vec4 &other) const
1136 {
1137 return Vec4<float>(_mm_mul_ps(vec, other.vec));
1138 }
1139
1140 template<> template<>
1141 inline Vec4<float> Vec4<float>::operator * (const float &other) const
1142 {
1143 return Vec4<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
1144 }
1145
1146 // Vec3<float> cross product
1147 template<>
Cross(const Vec3<float> & a,const Vec3<float> & b)1148 inline Vec3<float> Cross(const Vec3<float> &a, const Vec3<float> &b)
1149 {
1150 const __m128 left = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 1, 0, 2)));
1151 const __m128 right = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 0, 2, 1)));
1152 return _mm_sub_ps(left, right);
1153 }
1154 #endif
1155
1156 }; // namespace Math3D
1157
1158 // linear interpolation via float: 0.0=begin, 1.0=end
1159 template<typename X>
Lerp(const X & begin,const X & end,const float t)1160 inline X Lerp(const X& begin, const X& end, const float t)
1161 {
1162 return begin*(1.f-t) + end*t;
1163 }
1164
1165 // linear interpolation via int: 0=begin, base=end
1166 template<typename X, int base>
LerpInt(const X & begin,const X & end,const int t)1167 inline X LerpInt(const X& begin, const X& end, const int t)
1168 {
1169 return (begin*(base-t) + end*t) / base;
1170 }
1171