1 #pragma once
2 
3 // Some of the stuff in this file are snippets from all over the web, esp. dspmusic.org. I think it's all public domain.
4 // In any case, very little of it is used anywhere at the moment.
5 
6 #include <cmath>
7 #include <cstring>
8 #include <cstdint>
9 
10 typedef unsigned short float16;
11 
12 // This ain't a 1.5.10 float16, it's a stupid hack format where we chop 16 bits off a float.
13 // This choice is subject to change. Don't think I'm using this for anything at all now anyway.
14 // DEPRECATED
FloatToFloat16(float x)15 inline float16 FloatToFloat16(float x) {
16 	int ix;
17 	memcpy(&ix, &x, sizeof(float));
18 	return ix >> 16;
19 }
20 
Float16ToFloat(float16 ix)21 inline float Float16ToFloat(float16 ix) {
22 	float x;
23 	memcpy(&x, &ix, sizeof(float));
24 	return x;
25 }
26 
isPowerOf2(int n)27 inline bool isPowerOf2(int n) {
28 	return n == 1 || (n & (n - 1)) == 0;
29 }
30 
RoundUpToPowerOf2(uint32_t v)31 inline uint32_t RoundUpToPowerOf2(uint32_t v) {
32 	v--;
33 	v |= v >> 1;
34 	v |= v >> 2;
35 	v |= v >> 4;
36 	v |= v >> 8;
37 	v |= v >> 16;
38 	v++;
39 	return v;
40 }
41 
log2i(uint32_t val)42 inline uint32_t log2i(uint32_t val) {
43 	unsigned int ret = -1;
44 	while (val != 0) {
45 		val >>= 1; ret++;
46 	}
47 	return ret;
48 }
49 
50 #define PI 3.141592653589793f
51 #ifndef M_PI
52 #define M_PI 3.141592653589793f
53 #endif
54 
55 template<class T>
clamp_value(T val,T floor,T cap)56 inline T clamp_value(T val, T floor, T cap) {
57 	if (val > cap)
58 		return cap;
59 	else if (val < floor)
60 		return floor;
61 	else
62 		return val;
63 }
64 
65 #define ROUND_UP(x, a)   (((x) + (a) - 1) & ~((a) - 1))
66 #define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
67 
68 template<class T>
Clamp(T * val,const T & min,const T & max)69 inline void Clamp(T* val, const T& min, const T& max)
70 {
71 	if (*val < min)
72 		*val = min;
73 	else if (*val > max)
74 		*val = max;
75 }
76 
77 template<class T>
Clamp(const T val,const T & min,const T & max)78 inline T Clamp(const T val, const T& min, const T& max)
79 {
80 	T ret = val;
81 	Clamp(&ret, min, max);
82 	return ret;
83 }
84 
85 union FP32 {
86 	uint32_t u;
87 	float f;
88 };
89 
90 struct FP16 {
91 	uint16_t u;
92 };
93 
my_isinf(float f)94 inline bool my_isinf(float f) {
95 	FP32 f2u;
96 	f2u.f = f;
97 	return f2u.u == 0x7f800000 ||
98 		f2u.u == 0xff800000;
99 }
100 
my_isnan(float f)101 inline bool my_isnan(float f) {
102 	FP32 f2u;
103 	f2u.f = f;
104 	// NaNs have non-zero mantissa
105 	return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);
106 }
107 
my_isnanorinf(float f)108 inline bool my_isnanorinf(float f) {
109 	FP32 f2u;
110 	f2u.f = f;
111 	// NaNs have non-zero mantissa, infs have zero mantissa. That is, we just ignore the mantissa here.
112 	return ((f2u.u & 0x7F800000) == 0x7F800000);
113 }
114 
is_even(float d)115 inline int is_even(float d) {
116 	float int_part;
117 	modff(d / 2.0f, &int_part);
118 	return 2.0f * int_part == d;
119 }
120 
121 // Rounds *.5 to closest even number
round_ieee_754(double d)122 inline double round_ieee_754(double d) {
123 	float i = (float)floor(d);
124 	d -= i;
125 	if (d < 0.5f)
126 		return i;
127 	if (d > 0.5f)
128 		return i + 1.0f;
129 	if (is_even(i))
130 		return i;
131 	return i + 1.0f;
132 }
133 
134 // magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
135 // See also SSE2 version: https://gist.github.com/rygorous/2144712
half_to_float_fast5(FP16 h)136 inline FP32 half_to_float_fast5(FP16 h)
137 {
138 	static const FP32 magic = { (127 + (127 - 15)) << 23 };
139 	static const FP32 was_infnan = { (127 + 16) << 23 };
140 	FP32 o;
141 	o.u = (h.u & 0x7fff) << 13;     // exponent/mantissa bits
142 	o.f *= magic.f;                 // exponent adjust
143 	if (o.f >= was_infnan.f)        // make sure Inf/NaN survive (retain the low bits)
144 		o.u = (255 << 23) | (h.u & 0x03ff);
145 	o.u |= (h.u & 0x8000) << 16;    // sign bit
146 	return o;
147 }
148 
ExpandHalf(uint16_t half)149 inline float ExpandHalf(uint16_t half) {
150 	FP16 fp16;
151 	fp16.u = half;
152 	FP32 fp = half_to_float_fast5(fp16);
153 	return fp.f;
154 }
155 
156 // More magic code: https://gist.github.com/rygorous/2156668
float_to_half_fast3(FP32 f)157 inline FP16 float_to_half_fast3(FP32 f)
158 {
159 	static const FP32 f32infty = { 255 << 23 };
160 	static const FP32 f16infty = { 31 << 23 };
161 	static const FP32 magic = { 15 << 23 };
162 	static const uint32_t sign_mask = 0x80000000u;
163 	static const uint32_t round_mask = ~0xfffu;
164 	FP16 o = { 0 };
165 
166 	uint32_t sign = f.u & sign_mask;
167 	f.u ^= sign;
168 
169 	if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)
170 		o.u = (f.u > f32infty.u) ? (0x7e00 | (f.u & 0x3ff)) : 0x7c00; // NaN->qNaN and Inf->Inf
171 	else // (De)normalized number or zero
172 	{
173 		f.u &= round_mask;
174 		f.f *= magic.f;
175 		f.u -= round_mask;
176 		if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed
177 
178 		o.u = f.u >> 13; // Take the bits!
179 	}
180 
181 	o.u |= sign >> 16;
182 	return o;
183 }
184 
ShrinkToHalf(float full)185 inline uint16_t ShrinkToHalf(float full) {
186 	FP32 fp32;
187 	fp32.f = full;
188 	FP16 fp = float_to_half_fast3(fp32);
189 	return fp.u;
190 }
191 
192 // FPU control.
193 void EnableFZ();
194 
195 // Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
196 // where they can schedule VFP instructions on the NEON unit (these implementations have
197 // very slow VFP units).
198 // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
199 void FPU_SetFastMode();
200