1 /*
2 * When dividing by a known compile time constant, the division can be replaced
3 * by a multiply+shift operation. GCC will do this automatically,
4 * *BUT ONLY FOR DIVISION OF REGISTER-WIDTH OR NARROWER*.
5 *
6 * So on an 8-bit system, 16-bit divides will *NOT* be optimised.
7 *
8 * The macros here manually apply the multiply+shift operation for 16-bit numbers.
9 *
10 * Testing on an AtMega2560, -O3 optimizations:
11 *   Performance improvement of 85% to 90%+ speed up (division by non-powers of 2)
12 *   Zero increase in RAM usage
13 *   Average of 25 bytes Flash used per call site
14 *     Be careful calling this in a loop with aggressive loop unrolling!
15 *
16 * Note: testing of the multiply+shift technique on 8-bit division showed a
17 * slight slow down over native code on AtMega2560. So the 8 bit equivalent
18 * macros have not been included
19 */
20 
21 #pragma once
22 #include "libdivide.h"
23 #include "u16_ldparams.h"
24 #include "s16_ldparams.h"
25 
26 #define CAT_HELPER(a, b) a ## b
27 #define CONCAT(A, B) CAT_HELPER(A, B)
28 
29 // GCC will optimise division by a power of 2
30 // So allow that.
31 #define S16_ISPOW2_NEG(denom) \
32  (denom==-2 || \
33   denom==-4 || \
34   denom==-8 || \
35   denom==-16 || \
36   denom==-32 || \
37   denom==-64 || \
38   denom==-128 || \
39   denom==-256 || \
40   denom==-512 || \
41   denom==-1024 || \
42   denom==-2048 || \
43   denom==-4096 || \
44   denom==-8192 || \
45   denom==-16384)
46 #define S16_ISPOW2_POS(denom) \
47  (denom==2 || \
48   denom==4 || \
49   denom==8 || \
50   denom==16 || \
51   denom==32 || \
52   denom==64 || \
53   denom==128 || \
54   denom==256 || \
55   denom==512 || \
56   denom==1024 || \
57   denom==2048 || \
58   denom==4096 || \
59   denom==8192 || \
60   denom==16384)
61 #define U16_ISPOW2(denom) (S16_ISPOW2_POS(denom) || denom==32768)
62 #define S16_ISPOW2(denom) (S16_ISPOW2_POS(denom) || S16_ISPOW2_NEG(denom))
63 
64 // Apply the libdivide namespace if necessary
65 #ifdef __cplusplus
66 #define LIB_DIV_NAMESPACE libdivide::
67 #else
68 #define LIB_DIV_NAMESPACE
69 #endif
70 
71 /*
72 * Wrapper for *unsigned* 16-bit DIVISION. The divisor must be a compile time
73 * constant.
74 * E.g. FAST_DIV16U(value, 100)
75 */
76 #define U16_MAGIC(d) CONCAT(CONCAT(U16LD_DENOM_, d), _MAGIC)
77 #define U16_MORE(d) CONCAT(CONCAT(U16LD_DENOM_, d), _MORE)
78 #define FAST_DIV16U(a, d) (U16_ISPOW2(d) ? a/d : LIB_DIV_NAMESPACE libdivide_u16_do_raw(a, U16_MAGIC(d), U16_MORE(d)))
79 
80 /*
81 * Wrapper for *signed* 16-bit DIVISION by a *POSITIVE* compile time constant.
82 * E.g. FAST_DIV16(-value, 777)
83 *
84 * This only works for positive parmeters :-(
85 * A negative number results in a hypen in the macro name, which is not allowed
86 */
87 #define S16_MAGIC(d) CONCAT(CONCAT(S16LD_DENOM_, d), _MAGIC)
88 #define S16_MORE(d) CONCAT(CONCAT(S16LD_DENOM_, d), _MORE)
89 #define FAST_DIV16(a, d) (S16_ISPOW2(d) ? a/d : LIB_DIV_NAMESPACE libdivide_s16_do_raw(a, S16_MAGIC(d), S16_MORE(d)))
90 
91 /*
92 * Wrapper for *signed* 16-bit DIVISION by a *NEGATIVE* compile time constant.
93 * E.g. FAST_DIV16_NEG(-value, 777) // <-- It's converted to negative. Really.
94 *
95 * This only works for positive parmeters :-(
96 * A negative number results in a hypen in the macro name, which is not allowed
97 */
98 #define S16_MAGIC_NEG(d) CONCAT(CONCAT(S16LD_DENOM_MINUS_, d), _MAGIC)
99 #define S16_MORE_NEG(d) CONCAT(CONCAT(S16LD_DENOM_MINUS_, d), _MORE)
100 #define FAST_DIV16_NEG(a, d) (S16_ISPOW2(d) ? a/-d : LIB_DIV_NAMESPACE libdivide_s16_do_raw(a, S16_MAGIC_NEG(d), S16_MORE_NEG(d)))
101 
102 /*
103 * Wrapper for *unsigned* 16-bit MODULUS. The divisor must be a compile time
104 * constant.
105 * E.g. FAST_MOD16U(value, 6)
106 */
107 #define FAST_MOD16U(a, d) (a - (FAST_DIV16U(a, d) * d))
108