1 #include "fix16.h"
2
3 #include "pragmas.h"
4 #include "fix16_int64.h"
5
6 /* Subtraction and addition with overflow detection.
7 * The versions without overflow detection are inlined in the header.
8 */
fix16_add(fix16_t a,fix16_t b)9 FIXMATH_FUNC_ATTRS fix16_t fix16_add(fix16_t a, fix16_t b)
10 {
11 // Use unsigned integers because overflow with signed integers is
12 // an undefined operation (http://www.airs.com/blog/archives/120).
13 uint32_t _a = a, _b = b;
14 uint32_t sum = _a + _b;
15
16 // Overflow can only happen if sign of a == sign of b, and then
17 // it causes sign of sum != sign of a.
18 if (!((_a ^ _b) & 0x80000000) && ((_a ^ sum) & 0x80000000))
19 return FIX16_OVERFLOW;
20
21 return sum;
22 }
23
fix16_sub(fix16_t a,fix16_t b)24 FIXMATH_FUNC_ATTRS fix16_t fix16_sub(fix16_t a, fix16_t b)
25 {
26 uint32_t _a = a, _b = b;
27 uint32_t diff = _a - _b;
28
29 // Overflow can only happen if sign of a != sign of b, and then
30 // it causes sign of diff != sign of a.
31 if (((_a ^ _b) & 0x80000000) && ((_a ^ diff) & 0x80000000))
32 return FIX16_OVERFLOW;
33
34 return diff;
35 }
36
37 /* Saturating arithmetic */
fix16_sadd(fix16_t a,fix16_t b)38 FIXMATH_FUNC_ATTRS fix16_t fix16_sadd(fix16_t a, fix16_t b)
39 {
40 fix16_t result = fix16_add(a, b);
41
42 if (result == FIX16_OVERFLOW)
43 return (a >= 0) ? FIX16_MAX : FIX16_MIN;
44
45 return result;
46 }
47
fix16_ssub(fix16_t a,fix16_t b)48 FIXMATH_FUNC_ATTRS fix16_t fix16_ssub(fix16_t a, fix16_t b)
49 {
50 fix16_t result = fix16_sub(a, b);
51
52 if (result == FIX16_OVERFLOW)
53 return (a >= 0) ? FIX16_MAX : FIX16_MIN;
54
55 return result;
56 }
57
58
59
60 /* 64-bit implementation for fix16_mul. Fastest version for e.g. ARM Cortex M3.
61 * Performs a 32*32 -> 64bit multiplication. The middle 32 bits are the result,
62 * bottom 16 bits are used for rounding, and upper 16 bits are used for overflow
63 * detection.
64 */
65
fix16_mul(fix16_t inArg0,fix16_t inArg1)66 FIXMATH_FUNC_ATTRS fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
67 {
68 int64_t product = (int64_t)inArg0 * inArg1;
69
70 // The upper 17 bits should all be the same (the sign).
71 uint32_t upper = (product >> 47);
72
73 if (product < 0)
74 {
75 if (~upper)
76 return FIX16_OVERFLOW;
77
78 // This adjustment is required in order to round -1/2 correctly
79 product--;
80 }
81 else
82 {
83 if (upper)
84 return FIX16_OVERFLOW;
85 }
86
87 fix16_t result = product >> 16;
88 result += (product & 0x8000) >> 15;
89
90 return result;
91 }
92
93 /* Wrapper around fix16_mul to add saturating arithmetic. */
fix16_smul(fix16_t inArg0,fix16_t inArg1)94 FIXMATH_FUNC_ATTRS fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1)
95 {
96 fix16_t result = fix16_mul(inArg0, inArg1);
97
98 if (result == FIX16_OVERFLOW)
99 {
100 if ((inArg0 >= 0) == (inArg1 >= 0))
101 return FIX16_MAX;
102 else
103 return FIX16_MIN;
104 }
105
106 return result;
107 }
108
109 /* 32-bit implementation of fix16_div. Fastest version for e.g. ARM Cortex M3.
110 * Performs 32-bit divisions repeatedly to reduce the remainder. For this to
111 * be efficient, the processor has to have 32-bit hardware division.
112 */
113 #ifdef __GNUC__
114 // Count leading zeros, using processor-specific instruction if available.
115 #define clz(x) (__builtin_clzl(x) - (8 * sizeof(long) - 32))
116 #else
clz(uint32_t x)117 static uint8_t clz(uint32_t x)
118 {
119 uint8_t result = 0;
120 if (x == 0) return 32;
121 while (!(x & 0xF0000000)) { result += 4; x <<= 4; }
122 while (!(x & 0x80000000)) { result += 1; x <<= 1; }
123 return result;
124 }
125 #endif
126
fix16_div(fix16_t a,fix16_t b)127 FIXMATH_FUNC_ATTRS fix16_t fix16_div(fix16_t a, fix16_t b)
128 {
129 // This uses a hardware 32/32 bit division multiple times, until we have
130 // computed all the bits in (a<<17)/b. Usually this takes 1-3 iterations.
131
132 if (b == 0)
133 return FIX16_MIN;
134
135 uint32_t remainder = (a >= 0) ? a : (-a);
136 uint32_t divider = (b >= 0) ? b : (-b);
137 uint32_t quotient = 0;
138 int bit_pos = 17;
139
140 // Kick-start the division a bit.
141 // This improves speed in the worst-case scenarios where N and D are large
142 // It gets a lower estimate for the result by N/(D >> 17 + 1).
143 if (divider & 0xFFF00000)
144 {
145 uint32_t shifted_div = ((divider >> 17) + 1);
146 quotient = divideu32(remainder, shifted_div);
147 remainder -= ((uint64_t)quotient * divider) >> 17;
148 }
149
150 // If the divider is divisible by 2^n, take advantage of it.
151 while (!(divider & 0xF) && bit_pos >= 4)
152 {
153 divider >>= 4;
154 bit_pos -= 4;
155 }
156
157 while (remainder && bit_pos >= 0)
158 {
159 // Shift remainder as much as we can without overflowing
160 int shift = clz(remainder);
161 if (shift > bit_pos) shift = bit_pos;
162 remainder <<= shift;
163 bit_pos -= shift;
164
165 uint32_t div = divideu32(remainder, divider);
166 remainder = remainder % divider;
167 quotient += div << bit_pos;
168
169 if (div & ~(0xFFFFFFFF >> bit_pos))
170 return FIX16_OVERFLOW;
171
172 remainder <<= 1;
173 bit_pos--;
174 }
175
176 // Quotient is always positive so rounding is easy
177 quotient++;
178
179 fix16_t result = quotient >> 1;
180
181 // Figure out the sign of the result
182 if ((a ^ b) & 0x80000000)
183 {
184 if (result == FIX16_MIN)
185 return FIX16_OVERFLOW;
186
187 result = -result;
188 }
189
190 return result;
191 }
192
193 /* Wrapper around fix16_div to add saturating arithmetic. */
fix16_sdiv(fix16_t inArg0,fix16_t inArg1)194 FIXMATH_FUNC_ATTRS fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1)
195 {
196 fix16_t result = fix16_div(inArg0, inArg1);
197
198 if (result == FIX16_OVERFLOW)
199 {
200 if ((inArg0 >= 0) == (inArg1 >= 0))
201 return FIX16_MAX;
202 else
203 return FIX16_MIN;
204 }
205
206 return result;
207 }
208
fix16_lerp8(fix16_t inArg0,fix16_t inArg1,uint8_t inFract)209 FIXMATH_FUNC_ATTRS fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract)
210 {
211 int64_t tempOut = int64_mul_i32_i32(inArg0, ((1 << 8) - inFract));
212 tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
213 tempOut = int64_shift(tempOut, -8);
214 return (fix16_t)int64_lo(tempOut);
215 }
216
fix16_lerp16(fix16_t inArg0,fix16_t inArg1,uint16_t inFract)217 FIXMATH_FUNC_ATTRS fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract)
218 {
219 int64_t tempOut = int64_mul_i32_i32(inArg0, (((int32_t)1 << 16) - inFract));
220 tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
221 tempOut = int64_shift(tempOut, -16);
222 return (fix16_t)int64_lo(tempOut);
223 }
224
fix16_lerp32(fix16_t inArg0,fix16_t inArg1,uint32_t inFract)225 FIXMATH_FUNC_ATTRS fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract)
226 {
227 int64_t tempOut;
228 tempOut = ((int64_t)inArg0 * (0 - inFract));
229 tempOut += ((int64_t)inArg1 * inFract);
230 tempOut >>= 32;
231 return (fix16_t)tempOut;
232 }
233
234 static const uint32_t scales[8] = {
235 /* 5 decimals is enough for full fix16_t precision */
236 1, 10, 100, 1000, 10000, 100000, 100000, 100000
237 };
238
itoa_loop(char * buf,uint32_t scale,uint32_t value,bool skip)239 static char *itoa_loop(char *buf, uint32_t scale, uint32_t value, bool skip)
240 {
241 while (scale)
242 {
243 unsigned digit = (value / scale);
244
245 if (!skip || digit || scale == 1)
246 {
247 skip = false;
248 *buf++ = '0' + digit;
249 value %= scale;
250 }
251
252 scale /= 10;
253 }
254 return buf;
255 }
256
fix16_to_str(fix16_t value,char * buf,int decimals)257 void fix16_to_str(fix16_t value, char *buf, int decimals)
258 {
259 uint32_t uvalue = (value >= 0) ? value : -value;
260 if (value < 0)
261 *buf++ = '-';
262
263 /* Separate the integer and decimal parts of the value */
264 unsigned intpart = uvalue >> 16;
265 uint32_t fracpart = uvalue & 0xFFFF;
266 uint32_t scale = scales[decimals & 7];
267 fracpart = fix16_mul(fracpart, scale);
268
269 if (fracpart >= scale)
270 {
271 /* Handle carry from decimal part */
272 intpart++;
273 fracpart -= scale;
274 }
275
276 /* Format integer part */
277 buf = itoa_loop(buf, 10000, intpart, true);
278
279 /* Format decimal part (if any) */
280 if (scale != 1)
281 {
282 *buf++ = '.';
283 buf = itoa_loop(buf, scale / 10, fracpart, false);
284 }
285
286 *buf = '\0';
287 }
288
fix16_from_str(const char * buf)289 fix16_t fix16_from_str(const char *buf)
290 {
291 while (isspace(*buf))
292 buf++;
293
294 /* Decode the sign */
295 bool negative = (*buf == '-');
296 if (*buf == '+' || *buf == '-')
297 buf++;
298
299 /* Decode the integer part */
300 uint32_t intpart = 0;
301 int count = 0;
302 while (isdigit(*buf))
303 {
304 intpart *= 10;
305 intpart += *buf++ - '0';
306 count++;
307 }
308
309 if (count == 0 || count > 5
310 || intpart > 32768 || (!negative && intpart > 32767))
311 return FIX16_OVERFLOW;
312
313 fix16_t value = intpart << 16;
314
315 /* Decode the decimal part */
316 if (*buf == '.' || *buf == ',')
317 {
318 buf++;
319
320 uint32_t fracpart = 0;
321 uint32_t scale = 1;
322 while (isdigit(*buf) && scale < 100000)
323 {
324 scale *= 10;
325 fracpart *= 10;
326 fracpart += *buf++ - '0';
327 }
328
329 value += fix16_div(fracpart, scale);
330 }
331
332 /* Verify that there is no garbage left over */
333 while (*buf != '\0')
334 {
335 if (!isdigit(*buf) && !isspace(*buf))
336 return FIX16_OVERFLOW;
337
338 buf++;
339 }
340
341 return negative ? -value : value;
342 }
343