1 #ifndef HALIDE_RDOM_H
2 #define HALIDE_RDOM_H
3 
4 /** \file
5  * Defines the front-end syntax for reduction domains and reduction
6  * variables.
7  */
8 
9 #include <iostream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "Expr.h"
15 #include "Reduction.h"
16 #include "Util.h"
17 
18 namespace Halide {
19 
20 template<typename T>
21 class Buffer;
22 class OutputImageParam;
23 
24 /** A reduction variable represents a single dimension of a reduction
25  * domain (RDom). Don't construct them directly, instead construct an
26  * RDom, and use RDom::operator[] to get at the variables. For
27  * single-dimensional reduction domains, you can just cast a
28  * single-dimensional RDom to an RVar. */
29 class RVar {
30     std::string _name;
31     Internal::ReductionDomain _domain;
32     int _index;
33 
_var()34     const Internal::ReductionVariable &_var() const {
35         return _domain.domain().at(_index);
36     }
37 
38 public:
39     /** An empty reduction variable. */
RVar()40     RVar()
41         : _name(Internal::make_entity_name(this, "Halide:.*:RVar", 'r')) {
42     }
43 
44     /** Construct an RVar with the given name */
RVar(const std::string & n)45     explicit RVar(const std::string &n)
46         : _name(n) {
47     }
48 
49     /** Construct a reduction variable with the given name and
50      * bounds. Must be a member of the given reduction domain. */
RVar(Internal::ReductionDomain domain,int index)51     RVar(Internal::ReductionDomain domain, int index)
52         : _domain(std::move(domain)), _index(index) {
53     }
54 
55     /** The minimum value that this variable will take on */
56     Expr min() const;
57 
58     /** The number that this variable will take on. The maximum value
59      * of this variable will be min() + extent() - 1 */
60     Expr extent() const;
61 
62     /** The reduction domain this is associated with. */
domain()63     Internal::ReductionDomain domain() const {
64         return _domain;
65     }
66 
67     /** The name of this reduction variable */
68     const std::string &name() const;
69 
70     /** Reduction variables can be used as expressions. */
71     operator Expr() const;
72 };
73 
74 /** A multi-dimensional domain over which to iterate. Used when
75  * defining functions with update definitions.
76  *
77  * An reduction is a function with a two-part definition. It has an
78  * initial value, which looks much like a pure function, and an update
79  * definition, which may refer to some RDom. Evaluating such a
80  * function first initializes it over the required domain (which is
81  * inferred based on usage), and then runs update rule for all points
82  * in the RDom. For example:
83  *
84  \code
85  Func f;
86  Var x;
87  RDom r(0, 10);
88  f(x) = x; // the initial value
89  f(r) = f(r) * 2;
90  Buffer<int> result = f.realize(10);
91  \endcode
92  *
93  * This function creates a single-dimensional buffer of size 10, in
94  * which element x contains the value x*2. Internally, first the
95  * initialization rule fills in x at every site, and then the update
96  * definition doubles every site.
97  *
98  * One use of reductions is to build a function recursively (pure
99  * functions in halide cannot be recursive). For example, this
100  * function fills in an array with the first 20 fibonacci numbers:
101  *
102  \code
103  Func f;
104  Var x;
105  RDom r(2, 18);
106  f(x) = 1;
107  f(r) = f(r-1) + f(r-2);
108  \endcode
109  *
110  * Another use of reductions is to perform scattering operations, as
111  * unlike a pure function declaration, the left-hand-side of an update
112  * definition may contain general expressions:
113  *
114  \code
115  ImageParam input(UInt(8), 2);
116  Func histogram;
117  Var x;
118  RDom r(input); // Iterate over all pixels in the input
119  histogram(x) = 0;
120  histogram(input(r.x, r.y)) = histogram(input(r.x, r.y)) + 1;
121  \endcode
122  *
123  * An update definition may also be multi-dimensional. This example
124  * computes a summed-area table by first summing horizontally and then
125  * vertically:
126  *
127  \code
128  ImageParam input(Float(32), 2);
129  Func sum_x, sum_y;
130  Var x, y;
131  RDom r(input);
132  sum_x(x, y)     = input(x, y);
133  sum_x(r.x, r.y) = sum_x(r.x, r.y) + sum_x(r.x-1, r.y);
134  sum_y(x, y)     = sum_x(x, y);
135  sum_y(r.x, r.y) = sum_y(r.x, r.y) + sum_y(r.x, r.y-1);
136  \endcode
137  *
138  * You can also mix pure dimensions with reduction variables. In the
139  * previous example, note that there's no need for the y coordinate in
140  * sum_x to be traversed serially. The sum within each row is entirely
141  * independent. The rows could be computed in parallel, or in a
142  * different order, without changing the meaning. Therefore, we can
143  * instead write this definition as follows:
144  *
145  \code
146  ImageParam input(Float(32), 2);
147  Func sum_x, sum_y;
148  Var x, y;
149  RDom r(input);
150  sum_x(x, y)   = input(x, y);
151  sum_x(r.x, y) = sum_x(r.x, y) + sum_x(r.x-1, y);
152  sum_y(x, y)   = sum_x(x, y);
153  sum_y(x, r.y) = sum_y(x, r.y) + sum_y(x, r.y-1);
154  \endcode
155  *
156  * This lets us schedule it more flexibly. You can now parallelize the
157  * update step of sum_x over y by calling:
158  \code
159  sum_x.update().parallel(y).
160  \endcode
161  *
162  * Note that calling sum_x.parallel(y) only parallelizes the
163  * initialization step, and not the update step! Scheduling the update
164  * step of a reduction must be done using the handle returned by
165  * \ref Func::update(). This code parallelizes both the initialization
166  * step and the update step:
167  *
168  \code
169  sum_x.parallel(y);
170  sum_x.update().parallel(y);
171  \endcode
172  *
173  * When you mix reduction variables and pure dimensions, the reduction
174  * domain is traversed outermost. That is, for each point in the
175  * reduction domain, the inferred pure domain is traversed in its
176  * entirety. For the above example, this means that sum_x walks down
177  * the columns, and sum_y walks along the rows. This may not be
178  * cache-coherent. You may try reordering these dimensions using the
179  * schedule, but Halide will return an error if it decides that this
180  * risks changing the meaning of your function. The solution lies in
181  * clever scheduling. If we say:
182  *
183  \code
184  sum_x.compute_at(sum_y, y);
185  \endcode
186  *
187  * Then the sum in x is computed only as necessary for each scanline
188  * of the sum in y. This not only results in sum_x walking along the
189  * rows, it also improves the locality of the entire pipeline.
190  */
191 class RDom {
192     Internal::ReductionDomain dom;
193 
194     void init_vars(const std::string &name);
195 
196     void initialize_from_region(const Region &region, std::string name = "");
197 
198     template<typename... Args>
initialize_from_region(Region & region,const Expr & min,const Expr & extent,Args &&...args)199     HALIDE_NO_USER_CODE_INLINE void initialize_from_region(Region &region, const Expr &min, const Expr &extent, Args &&... args) {
200         region.push_back({min, extent});
201         initialize_from_region(region, std::forward<Args>(args)...);
202     }
203 
204 public:
205     /** Construct an undefined reduction domain. */
206     RDom() = default;
207 
208     /** Construct a multi-dimensional reduction domain with the given name. If the name
209      * is left blank, a unique one is auto-generated. */
210     // @{
211     HALIDE_NO_USER_CODE_INLINE RDom(const Region &region, std::string name = "") {
212         initialize_from_region(region, std::move(name));
213     }
214 
215     template<typename... Args>
RDom(Expr min,Expr extent,Args &&...args)216     HALIDE_NO_USER_CODE_INLINE RDom(Expr min, Expr extent, Args &&... args) {
217         // This should really just be a delegating constructor, but I couldn't make
218         // that work with variadic template unpacking in visual studio 2013
219         Region region;
220         initialize_from_region(region, min, extent, std::forward<Args>(args)...);
221     }
222     // @}
223 
224     /** Construct a reduction domain that iterates over all points in
225      * a given Buffer or ImageParam. Has the same dimensionality as
226      * the argument. */
227     // @{
228     RDom(const Buffer<void> &);
229     RDom(const OutputImageParam &);
230     template<typename T>
RDom(const Buffer<T> & im)231     HALIDE_NO_USER_CODE_INLINE RDom(const Buffer<T> &im)
232         : RDom(Buffer<void>(im)) {
233     }
234     // @}
235 
236     /** Construct a reduction domain that wraps an Internal ReductionDomain object. */
237     RDom(const Internal::ReductionDomain &d);
238 
239     /** Get at the internal reduction domain object that this wraps. */
domain()240     Internal::ReductionDomain domain() const {
241         return dom;
242     }
243 
244     /** Check if this reduction domain is non-null */
defined()245     bool defined() const {
246         return dom.defined();
247     }
248 
249     /** Compare two reduction domains for equality of reference */
same_as(const RDom & other)250     bool same_as(const RDom &other) const {
251         return dom.same_as(other.dom);
252     }
253 
254     /** Get the dimensionality of a reduction domain */
255     int dimensions() const;
256 
257     /** Get at one of the dimensions of the reduction domain */
258     RVar operator[](int) const;
259 
260     /** Single-dimensional reduction domains can be used as RVars directly. */
261     operator RVar() const;
262 
263     /** Single-dimensional reduction domains can be also be used as Exprs directly. */
264     operator Expr() const;
265 
266     /** Add a predicate to the RDom. An RDom may have multiple
267      * predicates associated with it. An update definition that uses
268      * an RDom only iterates over the subset points in the domain for
269      * which all of its predicates are true. The predicate expression
270      * obeys the same rules as the expressions used on the
271      * right-hand-side of the corresponding update definition. It may
272      * refer to the RDom's variables and free variables in the Func's
273      * update definition. It may include calls to other Funcs, or make
274      * recursive calls to the same Func. This permits iteration over
275      * non-rectangular domains, or domains with sizes that vary with
276      * some free variable, or domains with shapes determined by some
277      * other Func.
278      *
279      * Note that once RDom is used in the update definition of some
280      * Func, no new predicates can be added to the RDom.
281      *
282      * Consider a simple example:
283      \code
284      RDom r(0, 20, 0, 20);
285      r.where(r.x < r.y);
286      r.where(r.x == 10);
287      r.where(r.y > 13);
288      f(r.x, r.y) += 1;
289      \endcode
290      * This is equivalent to:
291      \code
292      for (int r.y = 0; r.y < 20; r.y++) {
293        if (r.y > 13) {
294          for (int r.x = 0; r.x < 20; r.x++) {
295            if (r.x == 10) {
296              if (r.x < r.y) {
297                f[r.x, r.y] += 1;
298              }
299            }
300          }
301        }
302      }
303      \endcode
304      *
305      * Where possible Halide restricts the range of the containing for
306      * loops to avoid the cases where the predicate is false so that
307      * the if statement can be removed entirely. The case above would
308      * be further simplified into:
309      *
310      \code
311      for (int r.y = 14; r.y < 20; r.y++) {
312        f[r.x, r.y] += 1;
313      }
314      \endcode
315      *
316      * In general, the predicates that we can simplify away by
317      * restricting loop ranges are inequalities that compare an inner
318      * Var or RVar to some expression in outer Vars or RVars.
319      *
320      * You can also pack multiple conditions into one predicate like so:
321      *
322      \code
323      RDom r(0, 20, 0, 20);
324      r.where((r.x < r.y) && (r.x == 10) && (r.y > 13));
325      f(r.x, r.y) += 1;
326      \endcode
327      *
328      */
329     void where(Expr predicate);
330 
331     /** Direct access to the first four dimensions of the reduction
332      * domain. Some of these variables may be undefined if the
333      * reduction domain has fewer than four dimensions. */
334     // @{
335     RVar x, y, z, w;
336     // @}
337 };
338 
339 /** Emit an RVar in a human-readable form */
340 std::ostream &operator<<(std::ostream &stream, const RVar &);
341 
342 /** Emit an RDom in a human-readable form. */
343 std::ostream &operator<<(std::ostream &stream, const RDom &);
344 }  // namespace Halide
345 
346 #endif
347