1 /* 2 * When dividing by a known compile time constant, the division can be replaced 3 * by a multiply+shift operation. GCC will do this automatically, 4 * *BUT ONLY FOR DIVISION OF REGISTER-WIDTH OR NARROWER*. 5 * 6 * So on an 8-bit system, 16-bit divides will *NOT* be optimised. 7 * 8 * The macros here manually apply the multiply+shift operation for 16-bit numbers. 9 * 10 * Testing on an AtMega2560, -O3 optimizations: 11 * Performance improvement of 85% to 90%+ speed up (division by non-powers of 2) 12 * Zero increase in RAM usage 13 * Average of 25 bytes Flash used per call site 14 * Be careful calling this in a loop with aggressive loop unrolling! 15 * 16 * Note: testing of the multiply+shift technique on 8-bit division showed a 17 * slight slow down over native code on AtMega2560. So the 8 bit equivalent 18 * macros have not been included 19 */ 20 21 #pragma once 22 #include "libdivide.h" 23 #include "u16_ldparams.h" 24 #include "s16_ldparams.h" 25 26 #define CAT_HELPER(a, b) a ## b 27 #define CONCAT(A, B) CAT_HELPER(A, B) 28 29 // GCC will optimise division by a power of 2 30 // So allow that. 31 #define S16_ISPOW2_NEG(denom) \ 32 (denom==-2 || \ 33 denom==-4 || \ 34 denom==-8 || \ 35 denom==-16 || \ 36 denom==-32 || \ 37 denom==-64 || \ 38 denom==-128 || \ 39 denom==-256 || \ 40 denom==-512 || \ 41 denom==-1024 || \ 42 denom==-2048 || \ 43 denom==-4096 || \ 44 denom==-8192 || \ 45 denom==-16384) 46 #define S16_ISPOW2_POS(denom) \ 47 (denom==2 || \ 48 denom==4 || \ 49 denom==8 || \ 50 denom==16 || \ 51 denom==32 || \ 52 denom==64 || \ 53 denom==128 || \ 54 denom==256 || \ 55 denom==512 || \ 56 denom==1024 || \ 57 denom==2048 || \ 58 denom==4096 || \ 59 denom==8192 || \ 60 denom==16384) 61 #define U16_ISPOW2(denom) (S16_ISPOW2_POS(denom) || denom==32768) 62 #define S16_ISPOW2(denom) (S16_ISPOW2_POS(denom) || S16_ISPOW2_NEG(denom)) 63 64 // Apply the libdivide namespace if necessary 65 #ifdef __cplusplus 66 #define LIB_DIV_NAMESPACE libdivide:: 67 #else 68 #define LIB_DIV_NAMESPACE 69 #endif 70 71 /* 72 * Wrapper for *unsigned* 16-bit DIVISION. The divisor must be a compile time 73 * constant. 74 * E.g. FAST_DIV16U(value, 100) 75 */ 76 #define U16_MAGIC(d) CONCAT(CONCAT(U16LD_DENOM_, d), _MAGIC) 77 #define U16_MORE(d) CONCAT(CONCAT(U16LD_DENOM_, d), _MORE) 78 #define FAST_DIV16U(a, d) (U16_ISPOW2(d) ? a/d : LIB_DIV_NAMESPACE libdivide_u16_do_raw(a, U16_MAGIC(d), U16_MORE(d))) 79 80 /* 81 * Wrapper for *signed* 16-bit DIVISION by a *POSITIVE* compile time constant. 82 * E.g. FAST_DIV16(-value, 777) 83 * 84 * This only works for positive parmeters :-( 85 * A negative number results in a hypen in the macro name, which is not allowed 86 */ 87 #define S16_MAGIC(d) CONCAT(CONCAT(S16LD_DENOM_, d), _MAGIC) 88 #define S16_MORE(d) CONCAT(CONCAT(S16LD_DENOM_, d), _MORE) 89 #define FAST_DIV16(a, d) (S16_ISPOW2(d) ? a/d : LIB_DIV_NAMESPACE libdivide_s16_do_raw(a, S16_MAGIC(d), S16_MORE(d))) 90 91 /* 92 * Wrapper for *signed* 16-bit DIVISION by a *NEGATIVE* compile time constant. 93 * E.g. FAST_DIV16_NEG(-value, 777) // <-- It's converted to negative. Really. 94 * 95 * This only works for positive parmeters :-( 96 * A negative number results in a hypen in the macro name, which is not allowed 97 */ 98 #define S16_MAGIC_NEG(d) CONCAT(CONCAT(S16LD_DENOM_MINUS_, d), _MAGIC) 99 #define S16_MORE_NEG(d) CONCAT(CONCAT(S16LD_DENOM_MINUS_, d), _MORE) 100 #define FAST_DIV16_NEG(a, d) (S16_ISPOW2(d) ? a/-d : LIB_DIV_NAMESPACE libdivide_s16_do_raw(a, S16_MAGIC_NEG(d), S16_MORE_NEG(d))) 101 102 /* 103 * Wrapper for *unsigned* 16-bit MODULUS. The divisor must be a compile time 104 * constant. 105 * E.g. FAST_MOD16U(value, 6) 106 */ 107 #define FAST_MOD16U(a, d) (a - (FAST_DIV16U(a, d) * d)) 108