1 #ifndef HALIDE_RDOM_H 2 #define HALIDE_RDOM_H 3 4 /** \file 5 * Defines the front-end syntax for reduction domains and reduction 6 * variables. 7 */ 8 9 #include <iostream> 10 #include <string> 11 #include <utility> 12 #include <vector> 13 14 #include "Expr.h" 15 #include "Reduction.h" 16 #include "Util.h" 17 18 namespace Halide { 19 20 template<typename T> 21 class Buffer; 22 class OutputImageParam; 23 24 /** A reduction variable represents a single dimension of a reduction 25 * domain (RDom). Don't construct them directly, instead construct an 26 * RDom, and use RDom::operator[] to get at the variables. For 27 * single-dimensional reduction domains, you can just cast a 28 * single-dimensional RDom to an RVar. */ 29 class RVar { 30 std::string _name; 31 Internal::ReductionDomain _domain; 32 int _index; 33 _var()34 const Internal::ReductionVariable &_var() const { 35 return _domain.domain().at(_index); 36 } 37 38 public: 39 /** An empty reduction variable. */ RVar()40 RVar() 41 : _name(Internal::make_entity_name(this, "Halide:.*:RVar", 'r')) { 42 } 43 44 /** Construct an RVar with the given name */ RVar(const std::string & n)45 explicit RVar(const std::string &n) 46 : _name(n) { 47 } 48 49 /** Construct a reduction variable with the given name and 50 * bounds. Must be a member of the given reduction domain. */ RVar(Internal::ReductionDomain domain,int index)51 RVar(Internal::ReductionDomain domain, int index) 52 : _domain(std::move(domain)), _index(index) { 53 } 54 55 /** The minimum value that this variable will take on */ 56 Expr min() const; 57 58 /** The number that this variable will take on. The maximum value 59 * of this variable will be min() + extent() - 1 */ 60 Expr extent() const; 61 62 /** The reduction domain this is associated with. */ domain()63 Internal::ReductionDomain domain() const { 64 return _domain; 65 } 66 67 /** The name of this reduction variable */ 68 const std::string &name() const; 69 70 /** Reduction variables can be used as expressions. */ 71 operator Expr() const; 72 }; 73 74 /** A multi-dimensional domain over which to iterate. Used when 75 * defining functions with update definitions. 76 * 77 * An reduction is a function with a two-part definition. It has an 78 * initial value, which looks much like a pure function, and an update 79 * definition, which may refer to some RDom. Evaluating such a 80 * function first initializes it over the required domain (which is 81 * inferred based on usage), and then runs update rule for all points 82 * in the RDom. For example: 83 * 84 \code 85 Func f; 86 Var x; 87 RDom r(0, 10); 88 f(x) = x; // the initial value 89 f(r) = f(r) * 2; 90 Buffer<int> result = f.realize(10); 91 \endcode 92 * 93 * This function creates a single-dimensional buffer of size 10, in 94 * which element x contains the value x*2. Internally, first the 95 * initialization rule fills in x at every site, and then the update 96 * definition doubles every site. 97 * 98 * One use of reductions is to build a function recursively (pure 99 * functions in halide cannot be recursive). For example, this 100 * function fills in an array with the first 20 fibonacci numbers: 101 * 102 \code 103 Func f; 104 Var x; 105 RDom r(2, 18); 106 f(x) = 1; 107 f(r) = f(r-1) + f(r-2); 108 \endcode 109 * 110 * Another use of reductions is to perform scattering operations, as 111 * unlike a pure function declaration, the left-hand-side of an update 112 * definition may contain general expressions: 113 * 114 \code 115 ImageParam input(UInt(8), 2); 116 Func histogram; 117 Var x; 118 RDom r(input); // Iterate over all pixels in the input 119 histogram(x) = 0; 120 histogram(input(r.x, r.y)) = histogram(input(r.x, r.y)) + 1; 121 \endcode 122 * 123 * An update definition may also be multi-dimensional. This example 124 * computes a summed-area table by first summing horizontally and then 125 * vertically: 126 * 127 \code 128 ImageParam input(Float(32), 2); 129 Func sum_x, sum_y; 130 Var x, y; 131 RDom r(input); 132 sum_x(x, y) = input(x, y); 133 sum_x(r.x, r.y) = sum_x(r.x, r.y) + sum_x(r.x-1, r.y); 134 sum_y(x, y) = sum_x(x, y); 135 sum_y(r.x, r.y) = sum_y(r.x, r.y) + sum_y(r.x, r.y-1); 136 \endcode 137 * 138 * You can also mix pure dimensions with reduction variables. In the 139 * previous example, note that there's no need for the y coordinate in 140 * sum_x to be traversed serially. The sum within each row is entirely 141 * independent. The rows could be computed in parallel, or in a 142 * different order, without changing the meaning. Therefore, we can 143 * instead write this definition as follows: 144 * 145 \code 146 ImageParam input(Float(32), 2); 147 Func sum_x, sum_y; 148 Var x, y; 149 RDom r(input); 150 sum_x(x, y) = input(x, y); 151 sum_x(r.x, y) = sum_x(r.x, y) + sum_x(r.x-1, y); 152 sum_y(x, y) = sum_x(x, y); 153 sum_y(x, r.y) = sum_y(x, r.y) + sum_y(x, r.y-1); 154 \endcode 155 * 156 * This lets us schedule it more flexibly. You can now parallelize the 157 * update step of sum_x over y by calling: 158 \code 159 sum_x.update().parallel(y). 160 \endcode 161 * 162 * Note that calling sum_x.parallel(y) only parallelizes the 163 * initialization step, and not the update step! Scheduling the update 164 * step of a reduction must be done using the handle returned by 165 * \ref Func::update(). This code parallelizes both the initialization 166 * step and the update step: 167 * 168 \code 169 sum_x.parallel(y); 170 sum_x.update().parallel(y); 171 \endcode 172 * 173 * When you mix reduction variables and pure dimensions, the reduction 174 * domain is traversed outermost. That is, for each point in the 175 * reduction domain, the inferred pure domain is traversed in its 176 * entirety. For the above example, this means that sum_x walks down 177 * the columns, and sum_y walks along the rows. This may not be 178 * cache-coherent. You may try reordering these dimensions using the 179 * schedule, but Halide will return an error if it decides that this 180 * risks changing the meaning of your function. The solution lies in 181 * clever scheduling. If we say: 182 * 183 \code 184 sum_x.compute_at(sum_y, y); 185 \endcode 186 * 187 * Then the sum in x is computed only as necessary for each scanline 188 * of the sum in y. This not only results in sum_x walking along the 189 * rows, it also improves the locality of the entire pipeline. 190 */ 191 class RDom { 192 Internal::ReductionDomain dom; 193 194 void init_vars(const std::string &name); 195 196 void initialize_from_region(const Region ®ion, std::string name = ""); 197 198 template<typename... Args> initialize_from_region(Region & region,const Expr & min,const Expr & extent,Args &&...args)199 HALIDE_NO_USER_CODE_INLINE void initialize_from_region(Region ®ion, const Expr &min, const Expr &extent, Args &&... args) { 200 region.push_back({min, extent}); 201 initialize_from_region(region, std::forward<Args>(args)...); 202 } 203 204 public: 205 /** Construct an undefined reduction domain. */ 206 RDom() = default; 207 208 /** Construct a multi-dimensional reduction domain with the given name. If the name 209 * is left blank, a unique one is auto-generated. */ 210 // @{ 211 HALIDE_NO_USER_CODE_INLINE RDom(const Region ®ion, std::string name = "") { 212 initialize_from_region(region, std::move(name)); 213 } 214 215 template<typename... Args> RDom(Expr min,Expr extent,Args &&...args)216 HALIDE_NO_USER_CODE_INLINE RDom(Expr min, Expr extent, Args &&... args) { 217 // This should really just be a delegating constructor, but I couldn't make 218 // that work with variadic template unpacking in visual studio 2013 219 Region region; 220 initialize_from_region(region, min, extent, std::forward<Args>(args)...); 221 } 222 // @} 223 224 /** Construct a reduction domain that iterates over all points in 225 * a given Buffer or ImageParam. Has the same dimensionality as 226 * the argument. */ 227 // @{ 228 RDom(const Buffer<void> &); 229 RDom(const OutputImageParam &); 230 template<typename T> RDom(const Buffer<T> & im)231 HALIDE_NO_USER_CODE_INLINE RDom(const Buffer<T> &im) 232 : RDom(Buffer<void>(im)) { 233 } 234 // @} 235 236 /** Construct a reduction domain that wraps an Internal ReductionDomain object. */ 237 RDom(const Internal::ReductionDomain &d); 238 239 /** Get at the internal reduction domain object that this wraps. */ domain()240 Internal::ReductionDomain domain() const { 241 return dom; 242 } 243 244 /** Check if this reduction domain is non-null */ defined()245 bool defined() const { 246 return dom.defined(); 247 } 248 249 /** Compare two reduction domains for equality of reference */ same_as(const RDom & other)250 bool same_as(const RDom &other) const { 251 return dom.same_as(other.dom); 252 } 253 254 /** Get the dimensionality of a reduction domain */ 255 int dimensions() const; 256 257 /** Get at one of the dimensions of the reduction domain */ 258 RVar operator[](int) const; 259 260 /** Single-dimensional reduction domains can be used as RVars directly. */ 261 operator RVar() const; 262 263 /** Single-dimensional reduction domains can be also be used as Exprs directly. */ 264 operator Expr() const; 265 266 /** Add a predicate to the RDom. An RDom may have multiple 267 * predicates associated with it. An update definition that uses 268 * an RDom only iterates over the subset points in the domain for 269 * which all of its predicates are true. The predicate expression 270 * obeys the same rules as the expressions used on the 271 * right-hand-side of the corresponding update definition. It may 272 * refer to the RDom's variables and free variables in the Func's 273 * update definition. It may include calls to other Funcs, or make 274 * recursive calls to the same Func. This permits iteration over 275 * non-rectangular domains, or domains with sizes that vary with 276 * some free variable, or domains with shapes determined by some 277 * other Func. 278 * 279 * Note that once RDom is used in the update definition of some 280 * Func, no new predicates can be added to the RDom. 281 * 282 * Consider a simple example: 283 \code 284 RDom r(0, 20, 0, 20); 285 r.where(r.x < r.y); 286 r.where(r.x == 10); 287 r.where(r.y > 13); 288 f(r.x, r.y) += 1; 289 \endcode 290 * This is equivalent to: 291 \code 292 for (int r.y = 0; r.y < 20; r.y++) { 293 if (r.y > 13) { 294 for (int r.x = 0; r.x < 20; r.x++) { 295 if (r.x == 10) { 296 if (r.x < r.y) { 297 f[r.x, r.y] += 1; 298 } 299 } 300 } 301 } 302 } 303 \endcode 304 * 305 * Where possible Halide restricts the range of the containing for 306 * loops to avoid the cases where the predicate is false so that 307 * the if statement can be removed entirely. The case above would 308 * be further simplified into: 309 * 310 \code 311 for (int r.y = 14; r.y < 20; r.y++) { 312 f[r.x, r.y] += 1; 313 } 314 \endcode 315 * 316 * In general, the predicates that we can simplify away by 317 * restricting loop ranges are inequalities that compare an inner 318 * Var or RVar to some expression in outer Vars or RVars. 319 * 320 * You can also pack multiple conditions into one predicate like so: 321 * 322 \code 323 RDom r(0, 20, 0, 20); 324 r.where((r.x < r.y) && (r.x == 10) && (r.y > 13)); 325 f(r.x, r.y) += 1; 326 \endcode 327 * 328 */ 329 void where(Expr predicate); 330 331 /** Direct access to the first four dimensions of the reduction 332 * domain. Some of these variables may be undefined if the 333 * reduction domain has fewer than four dimensions. */ 334 // @{ 335 RVar x, y, z, w; 336 // @} 337 }; 338 339 /** Emit an RVar in a human-readable form */ 340 std::ostream &operator<<(std::ostream &stream, const RVar &); 341 342 /** Emit an RDom in a human-readable form. */ 343 std::ostream &operator<<(std::ostream &stream, const RDom &); 344 } // namespace Halide 345 346 #endif 347