1 #ifndef HALIDE_FAST_INTEGER_DIVIDE_H 2 #define HALIDE_FAST_INTEGER_DIVIDE_H 3 4 #include "Buffer.h" 5 #include "Expr.h" 6 7 namespace Halide { 8 9 /** Built-in images used for fast_integer_divide below. Use of 10 * fast_integer_divide will automatically embed the appropriate tables 11 * in your object file. They are declared here in case you want to do 12 * something non-default with them. */ 13 namespace IntegerDivideTable { 14 Buffer<uint8_t> integer_divide_table_u8(); 15 Buffer<uint8_t> integer_divide_table_s8(); 16 Buffer<uint16_t> integer_divide_table_u16(); 17 Buffer<uint16_t> integer_divide_table_s16(); 18 Buffer<uint32_t> integer_divide_table_u32(); 19 Buffer<uint32_t> integer_divide_table_s32(); 20 } // namespace IntegerDivideTable 21 22 /** Integer division by small values can be done exactly as multiplies 23 * and shifts. This function does integer division for numerators of 24 * various integer types (8, 16, 32 bit signed and unsigned) 25 * numerators and uint8 denominators. The type of the result is the 26 * type of the numerator. The unsigned version is faster than the 27 * signed version, so cast the numerator to an unsigned int if you 28 * know it's positive. 29 * 30 * If your divisor is compile-time constant, Halide performs a 31 * slightly better optimization automatically, so there's no need to 32 * use this function (but it won't hurt). 33 * 34 * This function vectorizes well on arm, and well on x86 for 16 and 8 35 * bit vectors. For 32-bit vectors on x86 you're better off using 36 * native integer division. 37 * 38 * Also, this routine treats division by zero as division by 39 * 256. I.e. it interprets the uint8 divisor as a number from 1 to 256 40 * inclusive. 41 */ 42 Expr fast_integer_divide(Expr numerator, Expr denominator); 43 44 /** Use the fast integer division tables to implement a modulo 45 * operation via the Euclidean identity: a%b = a - (a/b)*b 46 */ 47 Expr fast_integer_modulo(Expr numerator, Expr denominator); 48 49 } // namespace Halide 50 51 #endif 52