1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #pragma once
19 
20 #include <cmath>
21 
22 #include "Common/Common.h"
23 #include "Core/Util/AudioFormat.h"  // for clamp_u8
24 #include "Common/Math/fast/fast_matrix.h"
25 
26 #if defined(_M_SSE)
27 #include <emmintrin.h>
28 #if _M_SSE >= 0x401
29 #include <smmintrin.h>
30 #endif
31 #endif
32 
33 namespace Math3D {
34 
35 // Helper for Vec classes to clamp values.
36 template<typename T>
VecClamp(const T & v,const T & low,const T & high)37 inline static T VecClamp(const T &v, const T &low, const T &high)
38 {
39 	if (v > high)
40 		return high;
41 	if (v < low)
42 		return low;
43 	return v;
44 }
45 
46 template<typename T>
47 class Vec2
48 {
49 public:
50 	union
51 	{
52 		struct
53 		{
54 			T x,y;
55 		};
56 #if defined(_M_SSE)
57 		__m128i ivec;
58 		__m128 vec;
59 #endif
60 	};
61 
AsArray()62 	T* AsArray() { return &x; }
AsArray()63 	const T* AsArray() const { return &x; }
64 
Vec2()65 	Vec2() {}
Vec2(const T a[2])66 	Vec2(const T a[2]) : x(a[0]), y(a[1]) {}
Vec2(const T & _x,const T & _y)67 	Vec2(const T& _x, const T& _y) : x(_x), y(_y) {}
68 #if defined(_M_SSE)
Vec2(const __m128 & _vec)69 	Vec2(const __m128 &_vec) : vec(_vec) {}
Vec2(const __m128i & _ivec)70 	Vec2(const __m128i &_ivec) : ivec(_ivec) {}
71 #endif
72 
73 	template<typename T2>
Cast()74 	Vec2<T2> Cast() const
75 	{
76 		return Vec2<T2>((T2)x, (T2)y);
77 	}
78 
AssignToAll(const T & f)79 	static Vec2 AssignToAll(const T& f)
80 	{
81 		return Vec2<T>(f, f);
82 	}
83 
Write(T a[2])84 	void Write(T a[2])
85 	{
86 		a[0] = x; a[1] = y;
87 	}
88 
89 	Vec2 operator +(const Vec2& other) const
90 	{
91 		return Vec2(x+other.x, y+other.y);
92 	}
93 	void operator += (const Vec2 &other)
94 	{
95 		x+=other.x; y+=other.y;
96 	}
97 	Vec2 operator -(const Vec2& other) const
98 	{
99 		return Vec2(x-other.x, y-other.y);
100 	}
101 	void operator -= (const Vec2& other)
102 	{
103 		x-=other.x; y-=other.y;
104 	}
105 	Vec2 operator -() const
106 	{
107 		return Vec2(-x,-y);
108 	}
109 	Vec2 operator * (const Vec2& other) const
110 	{
111 		return Vec2(x*other.x, y*other.y);
112 	}
113 	template<typename V>
114 	Vec2 operator * (const V& f) const
115 	{
116 		return Vec2(x*f,y*f);
117 	}
118 	template<typename V>
119 	void operator *= (const V& f)
120 	{
121 		x*=f; y*=f;
122 	}
123 	template<typename V>
124 	Vec2 operator / (const V& f) const
125 	{
126 		return Vec2(x/f,y/f);
127 	}
128 	template<typename V>
129 	void operator /= (const V& f)
130 	{
131 		*this = *this / f;
132 	}
133 
Length2()134 	T Length2() const
135 	{
136 		return x*x + y*y;
137 	}
138 
Clamp(const T & l,const T & h)139 	Vec2 Clamp(const T &l, const T &h) const
140 	{
141 		return Vec2(VecClamp(x, l, h), VecClamp(y, l, h));
142 	}
143 
144 	// Only implemented for T=float
145 	float Length() const;
146 	void SetLength(const float l);
147 	Vec2 WithLength(const float l) const;
148 	float Distance2To(Vec2 &other);
149 	Vec2 Normalized() const;
150 	float Normalize(); // returns the previous length, which is often useful
151 
152 	T& operator [] (int i) //allow vector[1] = 3   (vector.y=3)
153 	{
154 		return *((&x) + i);
155 	}
156 	T operator [] (const int i) const
157 	{
158 		return *((&x) + i);
159 	}
160 
SetZero()161 	void SetZero()
162 	{
163 		x=0; y=0;
164 	}
165 
166 	// Common aliases: UV (texel coordinates), ST (texture coordinates)
u()167 	T& u() { return x; }
v()168 	T& v() { return y; }
s()169 	T& s() { return x; }
t()170 	T& t() { return y; }
171 
u()172 	const T& u() const { return x; }
v()173 	const T& v() const { return y; }
s()174 	const T& s() const { return x; }
t()175 	const T& t() const { return y; }
176 
177 	// swizzlers - create a subvector of specific components
yx()178 	const Vec2 yx() const { return Vec2(y, x); }
vu()179 	const Vec2 vu() const { return Vec2(y, x); }
ts()180 	const Vec2 ts() const { return Vec2(y, x); }
181 };
182 
183 template<typename T>
184 class Vec3Packed;
185 
186 template<typename T>
187 class Vec3
188 {
189 public:
190 	union
191 	{
192 		struct
193 		{
194 			T x,y,z;
195 		};
196 #if defined(_M_SSE)
197 		__m128i ivec;
198 		__m128 vec;
199 #endif
200 	};
201 
AsArray()202 	T* AsArray() { return &x; }
AsArray()203 	const T* AsArray() const { return &x; }
204 
Vec3()205 	Vec3() {}
Vec3(const T a[3])206 	Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
Vec3(const T & _x,const T & _y,const T & _z)207 	Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
Vec3(const Vec2<T> & _xy,const T & _z)208 	Vec3(const Vec2<T>& _xy, const T& _z) : x(_xy.x), y(_xy.y), z(_z) {}
209 #if defined(_M_SSE)
Vec3(const __m128 & _vec)210 	Vec3(const __m128 &_vec) : vec(_vec) {}
Vec3(const __m128i & _ivec)211 	Vec3(const __m128i &_ivec) : ivec(_ivec) {}
Vec3(const Vec3Packed<T> & _xyz)212 	Vec3(const Vec3Packed<T> &_xyz) {
213 		vec = _mm_loadu_ps(_xyz.AsArray());
214 	}
215 #else
Vec3(const Vec3Packed<T> & _xyz)216 	Vec3(const Vec3Packed<T> &_xyz) : x(_xyz.x), y(_xyz.y), z(_xyz.z) {}
217 #endif
218 
219 	template<typename T2>
Cast()220 	Vec3<T2> Cast() const
221 	{
222 		return Vec3<T2>((T2)x, (T2)y, (T2)z);
223 	}
224 
225 	// Only implemented for T=int and T=float
226 	static Vec3 FromRGB(unsigned int rgb);
227 	unsigned int ToRGB() const; // alpha bits set to zero
228 
AssignToAll(const T & f)229 	static Vec3 AssignToAll(const T& f)
230 	{
231 		return Vec3<T>(f, f, f);
232 	}
233 
Write(T a[3])234 	void Write(T a[3])
235 	{
236 		a[0] = x; a[1] = y; a[2] = z;
237 	}
238 
239 	Vec3 operator +(const Vec3 &other) const
240 	{
241 		return Vec3(x+other.x, y+other.y, z+other.z);
242 	}
243 	void operator += (const Vec3 &other)
244 	{
245 		x+=other.x; y+=other.y; z+=other.z;
246 	}
247 	Vec3 operator -(const Vec3 &other) const
248 	{
249 		return Vec3(x-other.x, y-other.y, z-other.z);
250 	}
251 	void operator -= (const Vec3 &other)
252 	{
253 		x-=other.x; y-=other.y; z-=other.z;
254 	}
255 	Vec3 operator -() const
256 	{
257 		return Vec3(-x,-y,-z);
258 	}
259 	Vec3 operator * (const Vec3 &other) const
260 	{
261 		return Vec3(x*other.x, y*other.y, z*other.z);
262 	}
263 	template<typename V>
264 	Vec3 operator * (const V& f) const
265 	{
266 		return Vec3(x*f,y*f,z*f);
267 	}
268 	template<typename V>
269 	void operator *= (const V& f)
270 	{
271 		x*=f; y*=f; z*=f;
272 	}
273 	template<typename V>
274 	Vec3 operator / (const V& f) const
275 	{
276 		return Vec3(x/f,y/f,z/f);
277 	}
278 	template<typename V>
279 	void operator /= (const V& f)
280 	{
281 		*this = *this / f;
282 	}
283 
Length2()284 	T Length2() const
285 	{
286 		return x*x + y*y + z*z;
287 	}
288 
Clamp(const T & l,const T & h)289 	Vec3 Clamp(const T &l, const T &h) const
290 	{
291 		return Vec3(VecClamp(x, l, h), VecClamp(y, l, h), VecClamp(z, l, h));
292 	}
293 
294 	// Only implemented for T=float
295 	float Length() const;
296 	void SetLength(const float l);
297 	Vec3 WithLength(const float l) const;
298 	float Distance2To(Vec3 &other);
299 	Vec3 Normalized(bool useSSE4 = false) const;
300 	Vec3 NormalizedOr001(bool useSSE4 = false) const;
301 	float Normalize(); // returns the previous length, which is often useful
302 	float NormalizeOr001();
303 
304 	T& operator [] (int i) //allow vector[2] = 3   (vector.z=3)
305 	{
306 		return *((&x) + i);
307 	}
308 	T operator [] (const int i) const
309 	{
310 		return *((&x) + i);
311 	}
312 
SetZero()313 	void SetZero()
314 	{
315 		x=0; y=0; z=0;
316 	}
317 
318 	// Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
u()319 	T& u() { return x; }
v()320 	T& v() { return y; }
w()321 	T& w() { return z; }
322 
r()323 	T& r() { return x; }
g()324 	T& g() { return y; }
b()325 	T& b() { return z; }
326 
s()327 	T& s() { return x; }
t()328 	T& t() { return y; }
q()329 	T& q() { return z; }
330 
u()331 	const T& u() const { return x; }
v()332 	const T& v() const { return y; }
w()333 	const T& w() const { return z; }
334 
r()335 	const T& r() const { return x; }
g()336 	const T& g() const { return y; }
b()337 	const T& b() const { return z; }
338 
s()339 	const T& s() const { return x; }
t()340 	const T& t() const { return y; }
q()341 	const T& q() const { return z; }
342 
343 	// swizzlers - create a subvector of specific components
344 	// e.g. Vec2 uv() { return Vec2(x,y); }
345 	// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
346 #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
347 #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
348 	_DEFINE_SWIZZLER2(a, b, a##b); \
349 	_DEFINE_SWIZZLER2(a, b, a2##b2); \
350 	_DEFINE_SWIZZLER2(a, b, a3##b3); \
351 	_DEFINE_SWIZZLER2(a, b, a4##b4); \
352 	_DEFINE_SWIZZLER2(b, a, b##a); \
353 	_DEFINE_SWIZZLER2(b, a, b2##a2); \
354 	_DEFINE_SWIZZLER2(b, a, b3##a3); \
355 	_DEFINE_SWIZZLER2(b, a, b4##a4);
356 
357 	DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
358 	DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
359 	DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
360 #undef DEFINE_SWIZZLER2
361 #undef _DEFINE_SWIZZLER2
362 };
363 
364 template<typename T>
365 class Vec3Packed
366 {
367 public:
368 	union
369 	{
370 		struct
371 		{
372 			T x,y,z;
373 		};
374 	};
375 
AsArray()376 	T* AsArray() { return &x; }
AsArray()377 	const T* AsArray() const { return &x; }
378 
Vec3Packed()379 	Vec3Packed() {}
Vec3Packed(const T a[3])380 	Vec3Packed(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
Vec3Packed(const T & _x,const T & _y,const T & _z)381 	Vec3Packed(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
Vec3Packed(const Vec2<T> & _xy,const T & _z)382 	Vec3Packed(const Vec2<T>& _xy, const T& _z) : x(_xy.x), y(_xy.y), z(_z) {}
Vec3Packed(const Vec3<T> & _xyz)383 	Vec3Packed(const Vec3<T>& _xyz) {
384 		memcpy(&x, _xyz.AsArray(), sizeof(float) * 3);
385 	}
386 
387 	template<typename T2>
Cast()388 	Vec3Packed<T2> Cast() const
389 	{
390 		return Vec3Packed<T2>((T2)x, (T2)y, (T2)z);
391 	}
392 
393 	// Only implemented for T=int and T=float
394 	static Vec3Packed FromRGB(unsigned int rgb);
395 	unsigned int ToRGB() const; // alpha bits set to zero
396 
AssignToAll(const T & f)397 	static Vec3Packed AssignToAll(const T& f)
398 	{
399 		return Vec3Packed<T>(f, f, f);
400 	}
401 
Write(T a[3])402 	void Write(T a[3])
403 	{
404 		a[0] = x; a[1] = y; a[2] = z;
405 	}
406 
407 	Vec3Packed operator +(const Vec3Packed &other) const
408 	{
409 		return Vec3Packed(x+other.x, y+other.y, z+other.z);
410 	}
411 	void operator += (const Vec3Packed &other)
412 	{
413 		x+=other.x; y+=other.y; z+=other.z;
414 	}
415 	Vec3Packed operator -(const Vec3Packed &other) const
416 	{
417 		return Vec3Packed(x-other.x, y-other.y, z-other.z);
418 	}
419 	void operator -= (const Vec3Packed &other)
420 	{
421 		x-=other.x; y-=other.y; z-=other.z;
422 	}
423 	Vec3Packed operator -() const
424 	{
425 		return Vec3Packed(-x,-y,-z);
426 	}
427 	Vec3Packed operator * (const Vec3Packed &other) const
428 	{
429 		return Vec3Packed(x*other.x, y*other.y, z*other.z);
430 	}
431 	template<typename V>
432 	Vec3Packed operator * (const V& f) const
433 	{
434 		return Vec3Packed(x*f,y*f,z*f);
435 	}
436 	template<typename V>
437 	void operator *= (const V& f)
438 	{
439 		x*=f; y*=f; z*=f;
440 	}
441 	template<typename V>
442 	Vec3Packed operator / (const V& f) const
443 	{
444 		return Vec3Packed(x/f,y/f,z/f);
445 	}
446 	template<typename V>
447 	void operator /= (const V& f)
448 	{
449 		*this = *this / f;
450 	}
451 
Length2()452 	T Length2() const
453 	{
454 		return x*x + y*y + z*z;
455 	}
456 
Clamp(const T & l,const T & h)457 	Vec3Packed Clamp(const T &l, const T &h) const
458 	{
459 		return Vec3Packed(VecClamp(x, l, h), VecClamp(y, l, h), VecClamp(z, l, h));
460 	}
461 
462 	// Only implemented for T=float
463 	float Length() const;
464 	void SetLength(const float l);
465 	Vec3Packed WithLength(const float l) const;
466 	float Distance2To(Vec3Packed &other);
467 	Vec3Packed Normalized() const;
468 	float Normalize(); // returns the previous length, which is often useful
469 
470 	T& operator [] (int i) //allow vector[2] = 3   (vector.z=3)
471 	{
472 		return *((&x) + i);
473 	}
474 	T operator [] (const int i) const
475 	{
476 		return *((&x) + i);
477 	}
478 
SetZero()479 	void SetZero()
480 	{
481 		x=0; y=0; z=0;
482 	}
483 
484 	// Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
u()485 	T& u() { return x; }
v()486 	T& v() { return y; }
w()487 	T& w() { return z; }
488 
r()489 	T& r() { return x; }
g()490 	T& g() { return y; }
b()491 	T& b() { return z; }
492 
s()493 	T& s() { return x; }
t()494 	T& t() { return y; }
q()495 	T& q() { return z; }
496 
u()497 	const T& u() const { return x; }
v()498 	const T& v() const { return y; }
w()499 	const T& w() const { return z; }
500 
r()501 	const T& r() const { return x; }
g()502 	const T& g() const { return y; }
b()503 	const T& b() const { return z; }
504 
s()505 	const T& s() const { return x; }
t()506 	const T& t() const { return y; }
q()507 	const T& q() const { return z; }
508 
509 	// swizzlers - create a subvector of specific components
510 	// e.g. Vec2 uv() { return Vec2(x,y); }
511 	// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
512 #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
513 #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
514 	_DEFINE_SWIZZLER2(a, b, a##b); \
515 	_DEFINE_SWIZZLER2(a, b, a2##b2); \
516 	_DEFINE_SWIZZLER2(a, b, a3##b3); \
517 	_DEFINE_SWIZZLER2(a, b, a4##b4); \
518 	_DEFINE_SWIZZLER2(b, a, b##a); \
519 	_DEFINE_SWIZZLER2(b, a, b2##a2); \
520 	_DEFINE_SWIZZLER2(b, a, b3##a3); \
521 	_DEFINE_SWIZZLER2(b, a, b4##a4);
522 
523 	DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
524 	DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
525 	DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
526 #undef DEFINE_SWIZZLER2
527 #undef _DEFINE_SWIZZLER2
528 };
529 
530 template<typename T>
531 class Vec4
532 {
533 public:
534 	union
535 	{
536 		struct
537 		{
538 			T x,y,z,w;
539 		};
540 #if defined(_M_SSE)
541 		__m128i ivec;
542 		__m128 vec;
543 #endif
544 	};
545 
AsArray()546 	T* AsArray() { return &x; }
AsArray()547 	const T* AsArray() const { return &x; }
548 
Vec4()549 	Vec4() {}
Vec4(const T a[4])550 	Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
Vec4(const T & _x,const T & _y,const T & _z,const T & _w)551 	Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {}
Vec4(const Vec2<T> & _xy,const T & _z,const T & _w)552 	Vec4(const Vec2<T>& _xy, const T& _z, const T& _w) : x(_xy.x), y(_xy.y), z(_z), w(_w) {}
Vec4(const Vec3<T> & _xyz,const T & _w)553 	Vec4(const Vec3<T>& _xyz, const T& _w) : x(_xyz.x), y(_xyz.y), z(_xyz.z), w(_w) {}
554 #if defined(_M_SSE)
Vec4(const __m128 & _vec)555 	Vec4(const __m128 &_vec) : vec(_vec) {}
Vec4(const __m128i & _ivec)556 	Vec4(const __m128i &_ivec) : ivec(_ivec) {}
557 #endif
558 
559 	template<typename T2>
Cast()560 	Vec4<T2> Cast() const
561 	{
562 		return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w);
563 	}
564 
565 	// Only implemented for T=int and T=float
566 	static Vec4 FromRGBA(unsigned int rgba);
567 	static Vec4 FromRGBA(const u8 *rgba);
568 	unsigned int ToRGBA() const;
569 	void ToRGBA(u8 *rgba) const;
570 
AssignToAll(const T & f)571 	static Vec4 AssignToAll(const T& f)
572 	{
573 		return Vec4<T>(f, f, f, f);
574 	}
575 
Write(T a[4])576 	void Write(T a[4])
577 	{
578 		a[0] = x; a[1] = y; a[2] = z; a[3] = w;
579 	}
580 
581 	Vec4 operator +(const Vec4& other) const
582 	{
583 		return Vec4(x+other.x, y+other.y, z+other.z, w+other.w);
584 	}
585 	void operator += (const Vec4& other)
586 	{
587 		x+=other.x; y+=other.y; z+=other.z; w+=other.w;
588 	}
589 	Vec4 operator -(const Vec4 &other) const
590 	{
591 		return Vec4(x-other.x, y-other.y, z-other.z, w-other.w);
592 	}
593 	void operator -= (const Vec4 &other)
594 	{
595 		x-=other.x; y-=other.y; z-=other.z; w-=other.w;
596 	}
597 	Vec4 operator -() const
598 	{
599 		return Vec4(-x,-y,-z,-w);
600 	}
601 	Vec4 operator * (const Vec4 &other) const
602 	{
603 		return Vec4(x*other.x, y*other.y, z*other.z, w*other.w);
604 	}
605 	Vec4 operator | (const Vec4 &other) const
606 	{
607 		return Vec4(x | other.x, y | other.y, z | other.z, w | other.w);
608 	}
609 	template<typename V>
610 	Vec4 operator * (const V& f) const
611 	{
612 		return Vec4(x*f,y*f,z*f,w*f);
613 	}
614 	template<typename V>
615 	void operator *= (const V& f)
616 	{
617 		x*=f; y*=f; z*=f; w*=f;
618 	}
619 	template<typename V>
620 	Vec4 operator / (const V& f) const
621 	{
622 		return Vec4(x/f,y/f,z/f,w/f);
623 	}
624 	template<typename V>
625 	void operator /= (const V& f)
626 	{
627 		*this = *this / f;
628 	}
629 
630 	bool operator ==(const Vec4 &other) const {
631 		return x == other.x && y == other.y && z == other.z && w == other.w;
632 	}
633 
Length2()634 	T Length2() const
635 	{
636 		return x*x + y*y + z*z + w*w;
637 	}
638 
Clamp(const T & l,const T & h)639 	Vec4 Clamp(const T &l, const T &h) const
640 	{
641 		return Vec4(VecClamp(x, l, h), VecClamp(y, l, h), VecClamp(z, l, h), VecClamp(w, l, h));
642 	}
643 
Reciprocal()644 	Vec4 Reciprocal() const
645 	{
646 		const T one = 1.0f;
647 		return Vec4(one / x, one / y, one / z, one / w);
648 	}
649 
650 	// Only implemented for T=float
651 	float Length() const;
652 	void SetLength(const float l);
653 	Vec4 WithLength(const float l) const;
654 	float Distance2To(Vec4 &other);
655 	Vec4 Normalized() const;
656 	float Normalize(); // returns the previous length, which is often useful
657 
658 	T& operator [] (int i) //allow vector[2] = 3   (vector.z=3)
659 	{
660 		return *((&x) + i);
661 	}
662 	T operator [] (const int i) const
663 	{
664 		return *((&x) + i);
665 	}
666 
SetZero()667 	void SetZero()
668 	{
669 		x=0; y=0; z=0; w=0;
670 	}
671 
672 	// Common alias: RGBA (colors)
r()673 	T& r() { return x; }
g()674 	T& g() { return y; }
b()675 	T& b() { return z; }
a()676 	T& a() { return w; }
677 
r()678 	const T& r() const { return x; }
g()679 	const T& g() const { return y; }
b()680 	const T& b() const { return z; }
a()681 	const T& a() const { return w; }
682 
683 	// swizzlers - create a subvector of specific components
684 	// e.g. Vec2 uv() { return Vec2(x,y); }
685 	// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
686 #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
687 #define DEFINE_SWIZZLER2(a, b, a2, b2) \
688 	_DEFINE_SWIZZLER2(a, b, a##b); \
689 	_DEFINE_SWIZZLER2(a, b, a2##b2); \
690 	_DEFINE_SWIZZLER2(b, a, b##a); \
691 	_DEFINE_SWIZZLER2(b, a, b2##a2);
692 
693 	DEFINE_SWIZZLER2(x, y, r, g);
694 	DEFINE_SWIZZLER2(x, z, r, b);
695 	DEFINE_SWIZZLER2(x, w, r, a);
696 	DEFINE_SWIZZLER2(y, z, g, b);
697 	DEFINE_SWIZZLER2(y, w, g, a);
698 	DEFINE_SWIZZLER2(z, w, b, a);
699 #undef DEFINE_SWIZZLER2
700 #undef _DEFINE_SWIZZLER2
701 
702 #define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); }
703 #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
704 	_DEFINE_SWIZZLER3(a, b, c, a##b##c); \
705 	_DEFINE_SWIZZLER3(a, c, b, a##c##b); \
706 	_DEFINE_SWIZZLER3(b, a, c, b##a##c); \
707 	_DEFINE_SWIZZLER3(b, c, a, b##c##a); \
708 	_DEFINE_SWIZZLER3(c, a, b, c##a##b); \
709 	_DEFINE_SWIZZLER3(c, b, a, c##b##a); \
710 	_DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \
711 	_DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \
712 	_DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \
713 	_DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \
714 	_DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
715 	_DEFINE_SWIZZLER3(c, b, a, c2##b2##a2);
716 
717 	DEFINE_SWIZZLER3(x, y, z, r, g, b);
718 	DEFINE_SWIZZLER3(x, y, w, r, g, a);
719 	DEFINE_SWIZZLER3(x, z, w, r, b, a);
720 	DEFINE_SWIZZLER3(y, z, w, g, b, a);
721 #undef DEFINE_SWIZZLER3
722 #undef _DEFINE_SWIZZLER3
723 };
724 
725 
726 template<typename BaseType>
727 class Mat3x3
728 {
729 public:
730 	// Convention: first three values = first column
Mat3x3(const BaseType values[])731 	Mat3x3(const BaseType values[])
732 	{
733 		for (unsigned int i = 0; i < 3*3; ++i)
734 		{
735 			this->values[i] = values[i];
736 		}
737 	}
738 
Mat3x3(BaseType _00,BaseType _01,BaseType _02,BaseType _10,BaseType _11,BaseType _12,BaseType _20,BaseType _21,BaseType _22)739 	Mat3x3(BaseType _00, BaseType _01, BaseType _02, BaseType _10, BaseType _11, BaseType _12, BaseType _20, BaseType _21, BaseType _22)
740 	{
741 		values[0] = _00;
742 		values[1] = _01;
743 		values[2] = _02;
744 		values[3] = _10;
745 		values[4] = _11;
746 		values[5] = _12;
747 		values[6] = _20;
748 		values[7] = _21;
749 		values[8] = _22;
750 	}
751 
752 	template<typename T>
753 	Vec3<T> operator * (const Vec3<T>& vec) const
754 	{
755 		Vec3<T> ret;
756 		ret.x = values[0]*vec.x + values[3]*vec.y + values[6]*vec.z;
757 		ret.y = values[1]*vec.x + values[4]*vec.y + values[7]*vec.z;
758 		ret.z = values[2]*vec.x + values[5]*vec.y + values[8]*vec.z;
759 		return ret;
760 	}
761 
Inverse()762 	Mat3x3 Inverse() const
763 	{
764 		float a = values[0];
765 		float b = values[1];
766 		float c = values[2];
767 		float d = values[3];
768 		float e = values[4];
769 		float f = values[5];
770 		float g = values[6];
771 		float h = values[7];
772 		float i = values[8];
773 		return Mat3x3(e*i-f*h, f*g-d*i, d*h-e*g,
774 						c*h-b*i, a*i-c*g, b*g-a*h,
775 						b*f-c*e, c*d-a*f, a*e-b*d) / Det();
776 	}
777 
Det()778 	BaseType Det() const
779 	{
780 		return values[0]*values[4]*values[8] + values[3]*values[7]*values[2] +
781 				values[6]*values[1]*values[5] - values[2]*values[4]*values[6] -
782 				values[5]*values[7]*values[0] - values[8]*values[1]*values[3];
783 	}
784 
785 	Mat3x3 operator / (const BaseType& val) const
786 	{
787 		return Mat3x3(values[0]/val, values[1]/val, values[2]/val,
788 						values[3]/val, values[4]/val, values[5]/val,
789 						values[6]/val, values[7]/val, values[8]/val);
790 	}
791 
792 private:
793 	BaseType values[3*3];
794 };
795 
796 
797 template<typename BaseType>
798 class Mat4x4
799 {
800 public:
801 	// Convention: first four values in arrow = first column
Mat4x4(const BaseType values[])802 	Mat4x4(const BaseType values[])
803 	{
804 		for (unsigned int i = 0; i < 4*4; ++i)
805 		{
806 			this->values[i] = values[i];
807 		}
808 	}
809 
810 	template<typename T>
811 	Vec4<T> operator * (const Vec4<T>& vec) const
812 	{
813 		Vec4<T> ret;
814 		ret.x = values[0]*vec.x + values[4]*vec.y + values[8]*vec.z + values[12]*vec.w;
815 		ret.y = values[1]*vec.x + values[5]*vec.y + values[9]*vec.z + values[13]*vec.w;
816 		ret.z = values[2]*vec.x + values[6]*vec.y + values[10]*vec.z + values[14]*vec.w;
817 		ret.w = values[3]*vec.x + values[7]*vec.y + values[11]*vec.z + values[15]*vec.w;
818 		return ret;
819 	}
820 
821 private:
822 	BaseType values[4*4];
823 };
824 
825 }; // namespace Math3D
826 
827 typedef Math3D::Vec2<float> Vec2f;
828 typedef Math3D::Vec3<float> Vec3f;
829 typedef Math3D::Vec3Packed<float> Vec3Packedf;
830 typedef Math3D::Vec4<float> Vec4f;
831 
832 // v and vecOut must point to different memory.
Vec3ByMatrix43(float vecOut[3],const float v[3],const float m[12])833 inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12]) {
834 	vecOut[0] = v[0] * m[0] + v[1] * m[3] + v[2] * m[6] + m[9];
835 	vecOut[1] = v[0] * m[1] + v[1] * m[4] + v[2] * m[7] + m[10];
836 	vecOut[2] = v[0] * m[2] + v[1] * m[5] + v[2] * m[8] + m[11];
837 }
838 
Vec3ByMatrix44(float vecOut[4],const float v[3],const float m[16])839 inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16])
840 {
841 	vecOut[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + m[12];
842 	vecOut[1] = v[0] * m[1] + v[1] * m[5] + v[2] * m[9] + m[13];
843 	vecOut[2] = v[0] * m[2] + v[1] * m[6] + v[2] * m[10] + m[14];
844 	vecOut[3] = v[0] * m[3] + v[1] * m[7] + v[2] * m[11] + m[15];
845 }
846 
Vec4ByMatrix44(float vecOut[4],const float v[4],const float m[16])847 inline void Vec4ByMatrix44(float vecOut[4], const float v[4], const float m[16])
848 {
849 	vecOut[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + v[3] * m[12];
850 	vecOut[1] = v[0] * m[1] + v[1] * m[5] + v[2] * m[9] + v[3] * m[13];
851 	vecOut[2] = v[0] * m[2] + v[1] * m[6] + v[2] * m[10] + v[3] * m[14];
852 	vecOut[3] = v[0] * m[3] + v[1] * m[7] + v[2] * m[11] + v[3] * m[15];
853 }
854 
855 
Norm3ByMatrix43(float vecOut[3],const float v[3],const float m[12])856 inline void Norm3ByMatrix43(float vecOut[3], const float v[3], const float m[12])
857 {
858 	vecOut[0] = v[0] * m[0] + v[1] * m[3] + v[2] * m[6];
859 	vecOut[1] = v[0] * m[1] + v[1] * m[4] + v[2] * m[7];
860 	vecOut[2] = v[0] * m[2] + v[1] * m[5] + v[2] * m[8];
861 }
862 
Matrix4ByMatrix4(float out[16],const float a[16],const float b[16])863 inline void Matrix4ByMatrix4(float out[16], const float a[16], const float b[16]) {
864 	fast_matrix_mul_4x4(out, b, a);
865 }
866 
ConvertMatrix4x3To4x4(float * m4x4,const float * m4x3)867 inline void ConvertMatrix4x3To4x4(float *m4x4, const float *m4x3) {
868 	m4x4[0] = m4x3[0];
869 	m4x4[1] = m4x3[1];
870 	m4x4[2] = m4x3[2];
871 	m4x4[3] = 0.0f;
872 	m4x4[4] = m4x3[3];
873 	m4x4[5] = m4x3[4];
874 	m4x4[6] = m4x3[5];
875 	m4x4[7] = 0.0f;
876 	m4x4[8] = m4x3[6];
877 	m4x4[9] = m4x3[7];
878 	m4x4[10] = m4x3[8];
879 	m4x4[11] = 0.0f;
880 	m4x4[12] = m4x3[9];
881 	m4x4[13] = m4x3[10];
882 	m4x4[14] = m4x3[11];
883 	m4x4[15] = 1.0f;
884 }
885 
ConvertMatrix4x3To4x4Transposed(float * m4x4,const float * m4x3)886 inline void ConvertMatrix4x3To4x4Transposed(float *m4x4, const float *m4x3) {
887 	m4x4[0] = m4x3[0];
888 	m4x4[1] = m4x3[3];
889 	m4x4[2] = m4x3[6];
890 	m4x4[3] = m4x3[9];
891 	m4x4[4] = m4x3[1];
892 	m4x4[5] = m4x3[4];
893 	m4x4[6] = m4x3[7];
894 	m4x4[7] = m4x3[10];
895 	m4x4[8] = m4x3[2];
896 	m4x4[9] = m4x3[5];
897 	m4x4[10] = m4x3[8];
898 	m4x4[11] = m4x3[11];
899 	m4x4[12] = 0.0f;
900 	m4x4[13] = 0.0f;
901 	m4x4[14] = 0.0f;
902 	m4x4[15] = 1.0f;
903 }
904 
905 // 0369
906 // 147A
907 // 258B
908 // ->>-
909 // 0123
910 // 4567
911 // 89AB
912 // Don't see a way to SIMD that. Should be pretty fast anyway.
ConvertMatrix4x3To3x4Transposed(float * m4x4,const float * m4x3)913 inline void ConvertMatrix4x3To3x4Transposed(float *m4x4, const float *m4x3) {
914 	m4x4[0] = m4x3[0];
915 	m4x4[1] = m4x3[3];
916 	m4x4[2] = m4x3[6];
917 	m4x4[3] = m4x3[9];
918 	m4x4[4] = m4x3[1];
919 	m4x4[5] = m4x3[4];
920 	m4x4[6] = m4x3[7];
921 	m4x4[7] = m4x3[10];
922 	m4x4[8] = m4x3[2];
923 	m4x4[9] = m4x3[5];
924 	m4x4[10] = m4x3[8];
925 	m4x4[11] = m4x3[11];
926 }
927 
Transpose4x4(float out[16],const float in[16])928 inline void Transpose4x4(float out[16], const float in[16]) {
929 	for (int i = 0; i < 4; i++) {
930 		for (int j = 0; j < 4; j++) {
931 			out[i * 4 + j] = in[j * 4 + i];
932 		}
933 	}
934 }
935 
Vec3Dot(const float v1[3],const float v2[3])936 inline float Vec3Dot(const float v1[3], const float v2[3])
937 {
938 	return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
939 }
940 
941 namespace Math3D {
942 
943 template<typename T>
Dot(const Vec2<T> & a,const Vec2<T> & b)944 inline T Dot(const Vec2<T>& a, const Vec2<T>& b)
945 {
946 	return a.x*b.x + a.y*b.y;
947 }
948 
949 template<typename T>
Dot(const Vec3<T> & a,const Vec3<T> & b)950 inline T Dot(const Vec3<T>& a, const Vec3<T>& b)
951 {
952 	return a.x*b.x + a.y*b.y + a.z*b.z;
953 }
954 
955 template<typename T>
Dot(const Vec4<T> & a,const Vec4<T> & b)956 inline T Dot(const Vec4<T>& a, const Vec4<T>& b)
957 {
958 	return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
959 }
960 
961 template<typename T>
Cross(const Vec3<T> & a,const Vec3<T> & b)962 inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b)
963 {
964 	return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
965 }
966 
967 template<typename T>
Cross(const Vec3Packed<T> & a,const Vec3Packed<T> & b)968 inline Vec3Packed<T> Cross(const Vec3Packed<T>& a, const Vec3Packed<T>& b)
969 {
970 	return Vec3Packed<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
971 }
972 
973 template<>
FromRGB(unsigned int rgb)974 inline Vec3<float> Vec3<float>::FromRGB(unsigned int rgb)
975 {
976 #if defined(_M_SSE)
977 	__m128i z = _mm_setzero_si128();
978 	__m128i c = _mm_cvtsi32_si128(rgb);
979 	c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
980 	return Vec3<float>(_mm_mul_ps(_mm_cvtepi32_ps(c), _mm_set_ps1(1.0f / 255.0f)));
981 #else
982 	return Vec3((rgb & 0xFF) * (1.0f/255.0f),
983 				((rgb >> 8) & 0xFF) * (1.0f/255.0f),
984 				((rgb >> 16) & 0xFF) * (1.0f/255.0f));
985 #endif
986 }
987 
988 template<>
FromRGB(unsigned int rgb)989 inline Vec3<int> Vec3<int>::FromRGB(unsigned int rgb)
990 {
991 #if defined(_M_SSE)
992 	__m128i z = _mm_setzero_si128();
993 	__m128i c = _mm_cvtsi32_si128(rgb);
994 	c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
995 	return Vec3<int>(c);
996 #else
997 	return Vec3(rgb & 0xFF, (rgb >> 8) & 0xFF, (rgb >> 16) & 0xFF);
998 #endif
999 }
1000 
1001 template<>
ToRGB()1002 __forceinline unsigned int Vec3<float>::ToRGB() const
1003 {
1004 #if defined(_M_SSE)
1005 	__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
1006 	__m128i c16 = _mm_packs_epi32(c, c);
1007 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
1008 #else
1009 	return (clamp_u8((int)(r() * 255.f)) << 0) |
1010 			(clamp_u8((int)(g() * 255.f)) << 8) |
1011 			(clamp_u8((int)(b() * 255.f)) << 16);
1012 #endif
1013 }
1014 
1015 template<>
ToRGB()1016 __forceinline unsigned int Vec3<int>::ToRGB() const
1017 {
1018 #if defined(_M_SSE)
1019 	__m128i c16 = _mm_packs_epi32(ivec, ivec);
1020 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
1021 #else
1022 	return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16);
1023 #endif
1024 }
1025 
1026 template<>
FromRGBA(unsigned int rgba)1027 inline Vec4<float> Vec4<float>::FromRGBA(unsigned int rgba)
1028 {
1029 #if defined(_M_SSE)
1030 	__m128i z = _mm_setzero_si128();
1031 	__m128i c = _mm_cvtsi32_si128(rgba);
1032 	c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
1033 	return Vec4<float>(_mm_mul_ps(_mm_cvtepi32_ps(c), _mm_set_ps1(1.0f / 255.0f)));
1034 #else
1035 	return Vec4((rgba & 0xFF) * (1.0f/255.0f),
1036 				((rgba >> 8) & 0xFF) * (1.0f/255.0f),
1037 				((rgba >> 16) & 0xFF) * (1.0f/255.0f),
1038 				((rgba >> 24) & 0xFF) * (1.0f/255.0f));
1039 #endif
1040 }
1041 
1042 template<typename T>
FromRGBA(const u8 * rgba)1043 inline Vec4<T> Vec4<T>::FromRGBA(const u8 *rgba)
1044 {
1045 	return Vec4<T>::FromRGBA(*(unsigned int *)rgba);
1046 }
1047 
1048 template<>
FromRGBA(unsigned int rgba)1049 inline Vec4<int> Vec4<int>::FromRGBA(unsigned int rgba)
1050 {
1051 #if defined(_M_SSE)
1052 	__m128i z = _mm_setzero_si128();
1053 	__m128i c = _mm_cvtsi32_si128(rgba);
1054 	c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
1055 	return Vec4<int>(c);
1056 #else
1057 	return Vec4(rgba & 0xFF, (rgba >> 8) & 0xFF, (rgba >> 16) & 0xFF, (rgba >> 24) & 0xFF);
1058 #endif
1059 }
1060 
1061 template<>
ToRGBA()1062 __forceinline unsigned int Vec4<float>::ToRGBA() const
1063 {
1064 #if defined(_M_SSE)
1065 	__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
1066 	__m128i c16 = _mm_packs_epi32(c, c);
1067 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
1068 #else
1069 	return (clamp_u8((int)(r() * 255.f)) << 0) |
1070 			(clamp_u8((int)(g() * 255.f)) << 8) |
1071 			(clamp_u8((int)(b() * 255.f)) << 16) |
1072 			(clamp_u8((int)(a() * 255.f)) << 24);
1073 #endif
1074 }
1075 
1076 template<>
ToRGBA()1077 __forceinline unsigned int Vec4<int>::ToRGBA() const
1078 {
1079 #if defined(_M_SSE)
1080 	__m128i c16 = _mm_packs_epi32(ivec, ivec);
1081 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
1082 #else
1083 	return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16) | (clamp_u8(a()) << 24);
1084 #endif
1085 }
1086 
1087 template<typename T>
ToRGBA(u8 * rgba)1088 __forceinline void Vec4<T>::ToRGBA(u8 *rgba) const
1089 {
1090 	*(u32 *)rgba = ToRGBA();
1091 }
1092 
1093 #if defined(_M_SSE)
1094 // Specialized for SIMD optimization
1095 
1096 // Vec3<float> operation
1097 template<>
1098 inline void Vec3<float>::operator += (const Vec3<float> &other)
1099 {
1100 	vec = _mm_add_ps(vec, other.vec);
1101 }
1102 
1103 template<>
1104 inline Vec3<float> Vec3<float>::operator + (const Vec3 &other) const
1105 {
1106 	return Vec3<float>(_mm_add_ps(vec, other.vec));
1107 }
1108 
1109 template<>
1110 inline Vec3<float> Vec3<float>::operator * (const Vec3 &other) const
1111 {
1112 	return Vec3<float>(_mm_mul_ps(vec, other.vec));
1113 }
1114 
1115 template<> template<>
1116 inline Vec3<float> Vec3<float>::operator * (const float &other) const
1117 {
1118 	return Vec3<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
1119 }
1120 
1121 // Vec4<float> operation
1122 template<>
1123 inline void Vec4<float>::operator += (const Vec4<float> &other)
1124 {
1125 	vec = _mm_add_ps(vec, other.vec);
1126 }
1127 
1128 template<>
1129 inline Vec4<float> Vec4<float>::operator + (const Vec4 &other) const
1130 {
1131 	return Vec4<float>(_mm_add_ps(vec, other.vec));
1132 }
1133 
1134 template<>
1135 inline Vec4<float> Vec4<float>::operator * (const Vec4 &other) const
1136 {
1137 	return Vec4<float>(_mm_mul_ps(vec, other.vec));
1138 }
1139 
1140 template<> template<>
1141 inline Vec4<float> Vec4<float>::operator * (const float &other) const
1142 {
1143 	return Vec4<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
1144 }
1145 
1146 // Vec3<float> cross product
1147 template<>
Cross(const Vec3<float> & a,const Vec3<float> & b)1148 inline Vec3<float> Cross(const Vec3<float> &a, const Vec3<float> &b)
1149 {
1150 	const __m128 left = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 1, 0, 2)));
1151 	const __m128 right = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 0, 2, 1)));
1152 	return _mm_sub_ps(left, right);
1153 }
1154 #endif
1155 
1156 }; // namespace Math3D
1157 
1158 // linear interpolation via float: 0.0=begin, 1.0=end
1159 template<typename X>
Lerp(const X & begin,const X & end,const float t)1160 inline X Lerp(const X& begin, const X& end, const float t)
1161 {
1162 	return begin*(1.f-t) + end*t;
1163 }
1164 
1165 // linear interpolation via int: 0=begin, base=end
1166 template<typename X, int base>
LerpInt(const X & begin,const X & end,const int t)1167 inline X LerpInt(const X& begin, const X& end, const int t)
1168 {
1169 	return (begin*(base-t) + end*t) / base;
1170 }
1171