1 #include "fix16.h"
2 
3 #include "pragmas.h"
4 #include "fix16_int64.h"
5 
6 /* Subtraction and addition with overflow detection.
7  * The versions without overflow detection are inlined in the header.
8  */
fix16_add(fix16_t a,fix16_t b)9 FIXMATH_FUNC_ATTRS fix16_t fix16_add(fix16_t a, fix16_t b)
10 {
11     // Use unsigned integers because overflow with signed integers is
12     // an undefined operation (http://www.airs.com/blog/archives/120).
13     uint32_t _a = a, _b = b;
14     uint32_t sum = _a + _b;
15 
16     // Overflow can only happen if sign of a == sign of b, and then
17     // it causes sign of sum != sign of a.
18     if (!((_a ^ _b) & 0x80000000) && ((_a ^ sum) & 0x80000000))
19         return FIX16_OVERFLOW;
20 
21     return sum;
22 }
23 
fix16_sub(fix16_t a,fix16_t b)24 FIXMATH_FUNC_ATTRS fix16_t fix16_sub(fix16_t a, fix16_t b)
25 {
26     uint32_t _a = a, _b = b;
27     uint32_t diff = _a - _b;
28 
29     // Overflow can only happen if sign of a != sign of b, and then
30     // it causes sign of diff != sign of a.
31     if (((_a ^ _b) & 0x80000000) && ((_a ^ diff) & 0x80000000))
32         return FIX16_OVERFLOW;
33 
34     return diff;
35 }
36 
37 /* Saturating arithmetic */
fix16_sadd(fix16_t a,fix16_t b)38 FIXMATH_FUNC_ATTRS fix16_t fix16_sadd(fix16_t a, fix16_t b)
39 {
40     fix16_t result = fix16_add(a, b);
41 
42     if (result == FIX16_OVERFLOW)
43         return (a >= 0) ? FIX16_MAX : FIX16_MIN;
44 
45     return result;
46 }
47 
fix16_ssub(fix16_t a,fix16_t b)48 FIXMATH_FUNC_ATTRS fix16_t fix16_ssub(fix16_t a, fix16_t b)
49 {
50     fix16_t result = fix16_sub(a, b);
51 
52     if (result == FIX16_OVERFLOW)
53         return (a >= 0) ? FIX16_MAX : FIX16_MIN;
54 
55     return result;
56 }
57 
58 
59 
60 /* 64-bit implementation for fix16_mul. Fastest version for e.g. ARM Cortex M3.
61  * Performs a 32*32 -> 64bit multiplication. The middle 32 bits are the result,
62  * bottom 16 bits are used for rounding, and upper 16 bits are used for overflow
63  * detection.
64  */
65 
fix16_mul(fix16_t inArg0,fix16_t inArg1)66 FIXMATH_FUNC_ATTRS fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
67 {
68     int64_t product = (int64_t)inArg0 * inArg1;
69 
70     // The upper 17 bits should all be the same (the sign).
71     uint32_t upper = (product >> 47);
72 
73     if (product < 0)
74     {
75         if (~upper)
76                 return FIX16_OVERFLOW;
77 
78         // This adjustment is required in order to round -1/2 correctly
79         product--;
80     }
81     else
82     {
83         if (upper)
84                 return FIX16_OVERFLOW;
85     }
86 
87     fix16_t result = product >> 16;
88     result += (product & 0x8000) >> 15;
89 
90     return result;
91 }
92 
93 /* Wrapper around fix16_mul to add saturating arithmetic. */
fix16_smul(fix16_t inArg0,fix16_t inArg1)94 FIXMATH_FUNC_ATTRS fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1)
95 {
96     fix16_t result = fix16_mul(inArg0, inArg1);
97 
98     if (result == FIX16_OVERFLOW)
99     {
100         if ((inArg0 >= 0) == (inArg1 >= 0))
101             return FIX16_MAX;
102         else
103             return FIX16_MIN;
104     }
105 
106     return result;
107 }
108 
109 /* 32-bit implementation of fix16_div. Fastest version for e.g. ARM Cortex M3.
110  * Performs 32-bit divisions repeatedly to reduce the remainder. For this to
111  * be efficient, the processor has to have 32-bit hardware division.
112  */
113 #ifdef __GNUC__
114 // Count leading zeros, using processor-specific instruction if available.
115 #define clz(x) (__builtin_clzl(x) - (8 * sizeof(long) - 32))
116 #else
clz(uint32_t x)117 static uint8_t clz(uint32_t x)
118 {
119     uint8_t result = 0;
120     if (x == 0) return 32;
121     while (!(x & 0xF0000000)) { result += 4; x <<= 4; }
122     while (!(x & 0x80000000)) { result += 1; x <<= 1; }
123     return result;
124 }
125 #endif
126 
fix16_div(fix16_t a,fix16_t b)127 FIXMATH_FUNC_ATTRS fix16_t fix16_div(fix16_t a, fix16_t b)
128 {
129     // This uses a hardware 32/32 bit division multiple times, until we have
130     // computed all the bits in (a<<17)/b. Usually this takes 1-3 iterations.
131 
132     if (b == 0)
133             return FIX16_MIN;
134 
135     uint32_t remainder = (a >= 0) ? a : (-a);
136     uint32_t divider = (b >= 0) ? b : (-b);
137     uint32_t quotient = 0;
138     int bit_pos = 17;
139 
140     // Kick-start the division a bit.
141     // This improves speed in the worst-case scenarios where N and D are large
142     // It gets a lower estimate for the result by N/(D >> 17 + 1).
143     if (divider & 0xFFF00000)
144     {
145         uint32_t shifted_div = ((divider >> 17) + 1);
146         quotient = divideu32(remainder, shifted_div);
147         remainder -= ((uint64_t)quotient * divider) >> 17;
148     }
149 
150     // If the divider is divisible by 2^n, take advantage of it.
151     while (!(divider & 0xF) && bit_pos >= 4)
152     {
153         divider >>= 4;
154         bit_pos -= 4;
155     }
156 
157     while (remainder && bit_pos >= 0)
158     {
159         // Shift remainder as much as we can without overflowing
160         int shift = clz(remainder);
161         if (shift > bit_pos) shift = bit_pos;
162         remainder <<= shift;
163         bit_pos -= shift;
164 
165         uint32_t div = divideu32(remainder, divider);
166         remainder = remainder % divider;
167         quotient += div << bit_pos;
168 
169         if (div & ~(0xFFFFFFFF >> bit_pos))
170                 return FIX16_OVERFLOW;
171 
172         remainder <<= 1;
173         bit_pos--;
174     }
175 
176     // Quotient is always positive so rounding is easy
177     quotient++;
178 
179     fix16_t result = quotient >> 1;
180 
181     // Figure out the sign of the result
182     if ((a ^ b) & 0x80000000)
183     {
184         if (result == FIX16_MIN)
185                 return FIX16_OVERFLOW;
186 
187         result = -result;
188     }
189 
190     return result;
191 }
192 
193 /* Wrapper around fix16_div to add saturating arithmetic. */
fix16_sdiv(fix16_t inArg0,fix16_t inArg1)194 FIXMATH_FUNC_ATTRS fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1)
195 {
196     fix16_t result = fix16_div(inArg0, inArg1);
197 
198     if (result == FIX16_OVERFLOW)
199     {
200         if ((inArg0 >= 0) == (inArg1 >= 0))
201             return FIX16_MAX;
202         else
203             return FIX16_MIN;
204     }
205 
206     return result;
207 }
208 
fix16_lerp8(fix16_t inArg0,fix16_t inArg1,uint8_t inFract)209 FIXMATH_FUNC_ATTRS fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract)
210 {
211     int64_t tempOut = int64_mul_i32_i32(inArg0, ((1 << 8) - inFract));
212     tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
213     tempOut = int64_shift(tempOut, -8);
214     return (fix16_t)int64_lo(tempOut);
215 }
216 
fix16_lerp16(fix16_t inArg0,fix16_t inArg1,uint16_t inFract)217 FIXMATH_FUNC_ATTRS fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract)
218 {
219     int64_t tempOut = int64_mul_i32_i32(inArg0, (((int32_t)1 << 16) - inFract));
220     tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
221     tempOut = int64_shift(tempOut, -16);
222     return (fix16_t)int64_lo(tempOut);
223 }
224 
fix16_lerp32(fix16_t inArg0,fix16_t inArg1,uint32_t inFract)225 FIXMATH_FUNC_ATTRS fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract)
226 {
227     int64_t tempOut;
228     tempOut  = ((int64_t)inArg0 * (0 - inFract));
229     tempOut	+= ((int64_t)inArg1 * inFract);
230     tempOut >>= 32;
231     return (fix16_t)tempOut;
232 }
233 
234 static const uint32_t scales[8] = {
235     /* 5 decimals is enough for full fix16_t precision */
236     1, 10, 100, 1000, 10000, 100000, 100000, 100000
237 };
238 
itoa_loop(char * buf,uint32_t scale,uint32_t value,bool skip)239 static char *itoa_loop(char *buf, uint32_t scale, uint32_t value, bool skip)
240 {
241     while (scale)
242     {
243         unsigned digit = (value / scale);
244 
245         if (!skip || digit || scale == 1)
246         {
247             skip = false;
248             *buf++ = '0' + digit;
249             value %= scale;
250         }
251 
252         scale /= 10;
253     }
254     return buf;
255 }
256 
fix16_to_str(fix16_t value,char * buf,int decimals)257 void fix16_to_str(fix16_t value, char *buf, int decimals)
258 {
259     uint32_t uvalue = (value >= 0) ? value : -value;
260     if (value < 0)
261         *buf++ = '-';
262 
263     /* Separate the integer and decimal parts of the value */
264     unsigned intpart = uvalue >> 16;
265     uint32_t fracpart = uvalue & 0xFFFF;
266     uint32_t scale = scales[decimals & 7];
267     fracpart = fix16_mul(fracpart, scale);
268 
269     if (fracpart >= scale)
270     {
271         /* Handle carry from decimal part */
272         intpart++;
273         fracpart -= scale;
274     }
275 
276     /* Format integer part */
277     buf = itoa_loop(buf, 10000, intpart, true);
278 
279     /* Format decimal part (if any) */
280     if (scale != 1)
281     {
282         *buf++ = '.';
283         buf = itoa_loop(buf, scale / 10, fracpart, false);
284     }
285 
286     *buf = '\0';
287 }
288 
fix16_from_str(const char * buf)289 fix16_t fix16_from_str(const char *buf)
290 {
291     while (isspace(*buf))
292         buf++;
293 
294     /* Decode the sign */
295     bool negative = (*buf == '-');
296     if (*buf == '+' || *buf == '-')
297         buf++;
298 
299     /* Decode the integer part */
300     uint32_t intpart = 0;
301     int count = 0;
302     while (isdigit(*buf))
303     {
304         intpart *= 10;
305         intpart += *buf++ - '0';
306         count++;
307     }
308 
309     if (count == 0 || count > 5
310         || intpart > 32768 || (!negative && intpart > 32767))
311         return FIX16_OVERFLOW;
312 
313     fix16_t value = intpart << 16;
314 
315     /* Decode the decimal part */
316     if (*buf == '.' || *buf == ',')
317     {
318         buf++;
319 
320         uint32_t fracpart = 0;
321         uint32_t scale = 1;
322         while (isdigit(*buf) && scale < 100000)
323         {
324             scale *= 10;
325             fracpart *= 10;
326             fracpart += *buf++ - '0';
327         }
328 
329         value += fix16_div(fracpart, scale);
330     }
331 
332     /* Verify that there is no garbage left over */
333     while (*buf != '\0')
334     {
335         if (!isdigit(*buf) && !isspace(*buf))
336             return FIX16_OVERFLOW;
337 
338         buf++;
339     }
340 
341     return negative ? -value : value;
342 }
343