1 #ifndef HALIDE_SCHEDULE_H
2 #define HALIDE_SCHEDULE_H
3 
4 /** \file
5  * Defines the internal representation of the schedule for a function
6  */
7 
8 #include <map>
9 #include <string>
10 #include <utility>
11 #include <vector>
12 
13 #include "DeviceAPI.h"
14 #include "Expr.h"
15 #include "FunctionPtr.h"
16 #include "Parameter.h"
17 #include "PrefetchDirective.h"
18 
19 namespace Halide {
20 
21 class Func;
22 struct VarOrRVar;
23 
24 namespace Internal {
25 class Function;
26 struct FunctionContents;
27 struct LoopLevelContents;
28 }  // namespace Internal
29 
30 /** Different ways to handle a tail case in a split when the
31  * factor does not provably divide the extent. */
32 enum class TailStrategy {
33     /** Round up the extent to be a multiple of the split
34      * factor. Not legal for RVars, as it would change the meaning
35      * of the algorithm. Pros: generates the simplest, fastest
36      * code. Cons: if used on a stage that reads from the input or
37      * writes to the output, constrains the input or output size
38      * to be a multiple of the split factor. */
39     RoundUp,
40 
41     /** Guard the inner loop with an if statement that prevents
42      * evaluation beyond the original extent. Always legal. The if
43      * statement is treated like a boundary condition, and
44      * factored out into a loop epilogue if possible. Pros: no
45      * redundant re-evaluation; does not constrain input our
46      * output sizes. Cons: increases code size due to separate
47      * tail-case handling; vectorization will scalarize in the tail
48      * case to handle the if statement. */
49     GuardWithIf,
50 
51     /** Prevent evaluation beyond the original extent by shifting
52      * the tail case inwards, re-evaluating some points near the
53      * end. Only legal for pure variables in pure definitions. If
54      * the inner loop is very simple, the tail case is treated
55      * like a boundary condition and factored out into an
56      * epilogue.
57      *
58      * This is a good trade-off between several factors. Like
59      * RoundUp, it supports vectorization well, because the inner
60      * loop is always a fixed size with no data-dependent
61      * branching. It increases code size slightly for inner loops
62      * due to the epilogue handling, but not for outer loops
63      * (e.g. loops over tiles). If used on a stage that reads from
64      * an input or writes to an output, this stategy only requires
65      * that the input/output extent be at least the split factor,
66      * instead of a multiple of the split factor as with RoundUp. */
67     ShiftInwards,
68 
69     /** For pure definitions use ShiftInwards. For pure vars in
70      * update definitions use RoundUp. For RVars in update
71      * definitions use GuardWithIf. */
72     Auto
73 };
74 
75 /** Different ways to handle the case when the start/end of the loops of stages
76  * computed with (fused) are not aligned. */
77 enum class LoopAlignStrategy {
78     /** Shift the start of the fused loops to align. */
79     AlignStart,
80 
81     /** Shift the end of the fused loops to align. */
82     AlignEnd,
83 
84     /** compute_with will make no attempt to align the start/end of the
85      * fused loops. */
86     NoAlign,
87 
88     /** By default, LoopAlignStrategy is set to NoAlign. */
89     Auto
90 };
91 
92 /** A reference to a site in a Halide statement at the top of the
93  * body of a particular for loop. Evaluating a region of a halide
94  * function is done by generating a loop nest that spans its
95  * dimensions. We schedule the inputs to that function by
96  * recursively injecting realizations for them at particular sites
97  * in this loop nest. A LoopLevel identifies such a site. The site
98  * can either be a loop nest within all stages of a function
99  * or it can refer to a loop nest within a particular function's
100  * stage (initial definition or updates).
101  *
102  * Note that a LoopLevel is essentially a pointer to an underlying value;
103  * all copies of a LoopLevel refer to the same site, so mutating one copy
104  * (via the set() method) will effectively mutate all copies:
105  \code
106  Func f;
107  Var x;
108  LoopLevel a(f, x);
109  // Both a and b refer to LoopLevel(f, x)
110  LoopLevel b = a;
111  // Now both a and b refer to LoopLevel::root()
112  a.set(LoopLevel::root());
113  \endcode
114  * This is quite useful when splitting Halide code into utility libraries, as it allows
115  * a library to schedule code according to a caller's specifications, even if the caller
116  * hasn't fully defined its pipeline yet:
117  \code
118  Func demosaic(Func input,
119               LoopLevel intermed_compute_at,
120               LoopLevel intermed_store_at,
121               LoopLevel output_compute_at) {
122     Func intermed = ...;
123     Func output = ...;
124     intermed.compute_at(intermed_compute_at).store_at(intermed_store_at);
125     output.compute_at(output_compute_at);
126     return output;
127  }
128 
129  void process() {
130      // Note that these LoopLevels are all undefined when we pass them to demosaic()
131      LoopLevel intermed_compute_at, intermed_store_at, output_compute_at;
132      Func input = ...;
133      Func demosaiced = demosaic(input, intermed_compute_at, intermed_store_at, output_compute_at);
134      Func output = ...;
135 
136      // We need to ensure all LoopLevels have a well-defined value prior to lowering:
137      intermed_compute_at.set(LoopLevel(output, y));
138      intermed_store_at.set(LoopLevel(output, y));
139      output_compute_at.set(LoopLevel(output, x));
140  }
141  \endcode
142  */
143 class LoopLevel {
144     Internal::IntrusivePtr<Internal::LoopLevelContents> contents;
145 
LoopLevel(Internal::IntrusivePtr<Internal::LoopLevelContents> c)146     explicit LoopLevel(Internal::IntrusivePtr<Internal::LoopLevelContents> c)
147         : contents(std::move(c)) {
148     }
149     LoopLevel(const std::string &func_name, const std::string &var_name,
150               bool is_rvar, int stage_index, bool locked = false);
151 
152 public:
153     /** Return the index of the function stage associated with this loop level.
154      * Asserts if undefined */
155     int stage_index() const;
156 
157     /** Identify the loop nest corresponding to some dimension of some function */
158     // @{
159     LoopLevel(const Internal::Function &f, const VarOrRVar &v, int stage_index = -1);
160     LoopLevel(const Func &f, const VarOrRVar &v, int stage_index = -1);
161     // @}
162 
163     /** Construct an undefined LoopLevel. Calling any method on an undefined
164      * LoopLevel (other than set()) will assert. */
165     LoopLevel();
166 
167     /** Construct a special LoopLevel value that implies
168      * that a function should be inlined away. */
169     static LoopLevel inlined();
170 
171     /** Construct a special LoopLevel value which represents the
172      * location outside of all for loops. */
173     static LoopLevel root();
174 
175     /** Mutate our contents to match the contents of 'other'. */
176     void set(const LoopLevel &other);
177 
178     // All the public methods below this point are meant only for internal
179     // use by Halide, rather than user code; hence, they are deliberately
180     // documented with plain comments (rather than Doxygen) to avoid being
181     // present in user documentation.
182 
183     // Lock this LoopLevel.
184     LoopLevel &lock();
185 
186     // Return the Func name. Asserts if the LoopLevel is_root() or is_inlined() or !defined().
187     std::string func() const;
188 
189     // Return the VarOrRVar. Asserts if the LoopLevel is_root() or is_inlined() or !defined().
190     VarOrRVar var() const;
191 
192     // Return true iff the LoopLevel is defined. (Only LoopLevels created
193     // with the default ctor are undefined.)
194     bool defined() const;
195 
196     // Test if a loop level corresponds to inlining the function.
197     bool is_inlined() const;
198 
199     // Test if a loop level is 'root', which describes the site
200     // outside of all for loops.
201     bool is_root() const;
202 
203     // Return a string of the form func.var -- note that this is safe
204     // to call for root or inline LoopLevels, but asserts if !defined().
205     std::string to_string() const;
206 
207     // Compare this loop level against the variable name of a for
208     // loop, to see if this loop level refers to the site
209     // immediately inside this loop. Asserts if !defined().
210     bool match(const std::string &loop) const;
211 
212     bool match(const LoopLevel &other) const;
213 
214     // Check if two loop levels are exactly the same.
215     bool operator==(const LoopLevel &other) const;
216 
217     bool operator!=(const LoopLevel &other) const {
218         return !(*this == other);
219     }
220 
221 private:
222     void check_defined() const;
223     void check_locked() const;
224     void check_defined_and_locked() const;
225 };
226 
227 struct FuseLoopLevel {
228     LoopLevel level;
229     /** Contains alignment strategies for the fused dimensions (indexed by the
230      * dimension name). If not in the map, use the default alignment strategy
231      * to align the fused dimension (see \ref LoopAlignStrategy::Auto).
232      */
233     std::map<std::string, LoopAlignStrategy> align;
234 
FuseLoopLevelFuseLoopLevel235     FuseLoopLevel()
236         : level(LoopLevel::inlined().lock()) {
237     }
FuseLoopLevelFuseLoopLevel238     FuseLoopLevel(const LoopLevel &level, const std::map<std::string, LoopAlignStrategy> &align)
239         : level(level), align(align) {
240     }
241 };
242 
243 namespace Internal {
244 
245 class IRMutator;
246 struct ReductionVariable;
247 
248 struct Split {
249     std::string old_var, outer, inner;
250     Expr factor;
251     bool exact;  // Is it required that the factor divides the extent
252         // of the old var. True for splits of RVars. Forces
253         // tail strategy to be GuardWithIf.
254     TailStrategy tail;
255 
256     enum SplitType { SplitVar = 0,
257                      RenameVar,
258                      FuseVars,
259                      PurifyRVar };
260 
261     // If split_type is Rename, then this is just a renaming of the
262     // old_var to the outer and not a split. The inner var should
263     // be ignored, and factor should be one. Renames are kept in
264     // the same list as splits so that ordering between them is
265     // respected.
266 
267     // If split type is Purify, this replaces the old_var RVar to
268     // the outer Var. The inner var should be ignored, and factor
269     // should be one.
270 
271     // If split_type is Fuse, then this does the opposite of a
272     // split, it joins the outer and inner into the old_var.
273     SplitType split_type;
274 
is_renameSplit275     bool is_rename() const {
276         return split_type == RenameVar;
277     }
is_splitSplit278     bool is_split() const {
279         return split_type == SplitVar;
280     }
is_fuseSplit281     bool is_fuse() const {
282         return split_type == FuseVars;
283     }
is_purifySplit284     bool is_purify() const {
285         return split_type == PurifyRVar;
286     }
287 };
288 
289 /** Each Dim below has a dim_type, which tells you what
290  * transformations are legal on it. When you combine two Dims of
291  * distinct DimTypes (e.g. with Stage::fuse), the combined result has
292  * the greater enum value of the two types. */
293 enum class DimType {
294     /** This dim originated from a Var. You can evaluate a Func at
295      * distinct values of this Var in any order over an interval
296      * that's at least as large as the interval required. In pure
297      * definitions you can even redundantly re-evaluate points. */
298     PureVar = 0,
299 
300     /** The dim originated from an RVar. You can evaluate a Func at
301      * distinct values of this RVar in any order (including in
302      * parallel) over exactly the interval specified in the
303      * RDom. PureRVars can also be reordered arbitrarily in the dims
304      * list, as there are no data hazards between the evaluation of
305      * the Func at distinct values of the RVar.
306      *
307      * The most common case where an RVar is considered pure is RVars
308      * that are used in a way which obeys all the syntactic
309      * constraints that a Var does, e.g:
310      *
311      \code
312      RDom r(0, 100);
313      f(r.x) = f(r.x) + 5;
314      \endcode
315      *
316      * Other cases where RVars are pure are where the sites being
317      * written to by the Func evaluated at one value of the RVar
318      * couldn't possibly collide with the sites being written or read
319      * by the Func at a distinct value of the RVar. For example, r.x
320      * is pure in the following three definitions:
321      *
322      \code
323 
324      // This definition writes to even coordinates and reads from the
325      // same site (which no other value of r.x is writing to) and odd
326      // sites (which no other value of r.x is writing to):
327      f(2*r.x) = max(f(2*r.x), f(2*r.x + 7));
328 
329      // This definition writes to scanline zero and reads from the the
330      // same site and scanline one:
331      f(r.x, 0) += f(r.x, 1);
332 
333      // This definition reads and writes over non-overlapping ranges:
334      f(r.x + 100) += f(r.x);
335      \endcode
336      *
337      * To give two counterexamples, r.x is not pure in the following
338      * definitions:
339      *
340      \code
341      // The same site is written by distinct values of the RVar
342      // (write-after-write hazard):
343      f(r.x / 2) += f(r.x);
344 
345      // One value of r.x reads from a site that another value of r.x
346      // is writing to (read-after-write hazard):
347      f(r.x) += f(r.x + 1);
348      \endcode
349      */
350     PureRVar,
351 
352     /** The dim originated from an RVar. You must evaluate a Func at
353      * distinct values of this RVar in increasing order over precisely
354      * the interval specified in the RDom. ImpureRVars may not be
355      * reordered with respect to other ImpureRVars.
356      *
357      * All RVars are impure by default. Those for which we can prove
358      * no data hazards exist get promoted to PureRVar. There are two
359      * instances in which ImpureRVars may be parallelized or reordered
360      * even in the presence of hazards:
361      *
362      * 1) In the case of an update definition that has been proven to be
363      * an associative and commutative reduction, reordering of
364      * ImpureRVars is allowed, and parallelizing them is allowed if
365      * the update has been made atomic.
366      *
367      * 2) ImpureRVars can also be reordered and parallelized if
368      * Func::allow_race_conditions() has been set. This is the escape
369      * hatch for when there are no hazards but the checks above failed
370      * to prove that (RDom::where can encode arbitrary facts about
371      * non-linear integer arithmetic, which is undecidable), or for
372      * when you don't actually care about the non-determinism
373      * introduced by data hazards (e.g. in the algorithm HOGWILD!).
374      */
375     ImpureRVar,
376 };
377 
378 /** The Dim struct represents one loop in the schedule's
379  * representation of a loop nest. */
380 struct Dim {
381     /** Name of the loop variable */
382     std::string var;
383 
384     /** How are the loop values traversed (e.g. unrolled, vectorized, parallel) */
385     ForType for_type;
386 
387     /** On what device does the body of the loop execute (e.g. Host, GPU, Hexagon) */
388     DeviceAPI device_api;
389 
390     /** The DimType tells us what transformations are legal on this
391      * loop (see the DimType enum above). */
392     DimType dim_type;
393 
394     /** Can this loop be evaluated in any order (including in
395      * parallel)? Equivalently, are there no data hazards between
396      * evaluations of the Func at distinct values of this var? */
is_pureDim397     bool is_pure() const {
398         return (dim_type == DimType::PureVar) || (dim_type == DimType::PureRVar);
399     }
400 
401     /** Did this loop originate from an RVar (in which case the bounds
402      * of the loops are algorithmically meaningful)? */
is_rvarDim403     bool is_rvar() const {
404         return (dim_type == DimType::PureRVar) || (dim_type == DimType::ImpureRVar);
405     }
406 
407     /** Could multiple iterations of this loop happen at the same
408      * time, with reads and writes interleaved in arbitrary ways
409      * according to the memory model of the underlying compiler and
410      * machine? */
is_unordered_parallelDim411     bool is_unordered_parallel() const {
412         return Halide::Internal::is_unordered_parallel(for_type);
413     }
414 
415     /** Could multiple iterations of this loop happen at the same
416      * time? Vectorized and GPULanes loop types are parallel but not
417      * unordered, because the loop iterations proceed together in
418      * lockstep with some well-defined outcome if there are hazards. */
is_parallelDim419     bool is_parallel() const {
420         return Halide::Internal::is_parallel(for_type);
421     }
422 };
423 
424 /** A bound on a loop, typically from Func::bound */
425 struct Bound {
426     /** The loop var being bounded */
427     std::string var;
428 
429     /** Declared min and extent of the loop. min may be undefined if
430      * Func::bound_extent was used. */
431     Expr min, extent;
432 
433     /** If defined, the number of iterations will be a multiple of
434      * "modulus", and the first iteration will be at a value congruent
435      * to "remainder" modulo "modulus". Set by Func::align_bounds. */
436     Expr modulus, remainder;
437 };
438 
439 /** Properties of one axis of the storage of a Func */
440 struct StorageDim {
441     /** The var in the pure definition corresponding to this axis */
442     std::string var;
443 
444     /** The bounds allocated (not computed) must be a multiple of
445      * "alignment". Set by Func::align_storage. */
446     Expr alignment;
447 
448     /** If the Func is explicitly folded along this axis (with
449      * Func::fold_storage) this gives the extent of the circular
450      * buffer used, and whether it is used in increasing order
451      * (fold_forward = true) or decreasing order (fold_forward =
452      * false). */
453     Expr fold_factor;
454     bool fold_forward;
455 };
456 
457 /** This represents two stages with fused loop nests from outermost to
458  * a specific loop level. The loops to compute func_1(stage_1) are
459  * fused with the loops to compute func_2(stage_2) from outermost to
460  * loop level var_name and the computation from stage_1 of func_1
461  * occurs first.
462  */
463 struct FusedPair {
464     std::string func_1;
465     std::string func_2;
466     size_t stage_1;
467     size_t stage_2;
468     std::string var_name;
469 
470     FusedPair() = default;
FusedPairFusedPair471     FusedPair(const std::string &f1, size_t s1, const std::string &f2,
472               size_t s2, const std::string &var)
473         : func_1(f1), func_2(f2), stage_1(s1), stage_2(s2), var_name(var) {
474     }
475 
476     bool operator==(const FusedPair &other) const {
477         return (func_1 == other.func_1) && (func_2 == other.func_2) &&
478                (stage_1 == other.stage_1) && (stage_2 == other.stage_2) &&
479                (var_name == other.var_name);
480     }
481     bool operator<(const FusedPair &other) const {
482         if (func_1 != other.func_1) {
483             return func_1 < other.func_1;
484         }
485         if (func_2 != other.func_2) {
486             return func_2 < other.func_2;
487         }
488         if (var_name != other.var_name) {
489             return var_name < other.var_name;
490         }
491         if (stage_1 != other.stage_1) {
492             return stage_1 < other.stage_1;
493         }
494         return stage_2 < other.stage_2;
495     }
496 };
497 
498 struct FuncScheduleContents;
499 struct StageScheduleContents;
500 struct FunctionContents;
501 
502 /** A schedule for a Function of a Halide pipeline. This schedule is
503  * applied to all stages of the Function. Right now this interface is
504  * basically a struct, offering mutable access to its innards.
505  * In the future it may become more encapsulated. */
506 class FuncSchedule {
507     IntrusivePtr<FuncScheduleContents> contents;
508 
509 public:
FuncSchedule(IntrusivePtr<FuncScheduleContents> c)510     FuncSchedule(IntrusivePtr<FuncScheduleContents> c)
511         : contents(std::move(c)) {
512     }
FuncSchedule(const FuncSchedule & other)513     FuncSchedule(const FuncSchedule &other)
514         : contents(other.contents) {
515     }
516     FuncSchedule();
517 
518     /** Return a deep copy of this FuncSchedule. It recursively deep copies all
519      * called functions, schedules, specializations, and reduction domains. This
520      * method takes a map of <old FunctionContents, deep-copied version> as input
521      * and would use the deep-copied FunctionContents from the map if exists
522      * instead of creating a new deep-copy to avoid creating deep-copies of the
523      * same FunctionContents multiple times.
524      */
525     FuncSchedule deep_copy(
526         std::map<FunctionPtr, FunctionPtr> &copied_map) const;
527 
528     /** This flag is set to true if the schedule is memoized. */
529     // @{
530     bool &memoized();
531     bool memoized() const;
532     // @}
533 
534     /** Is the production of this Function done asynchronously */
535     bool &async();
536     bool async() const;
537 
538     /** The list and order of dimensions used to store this
539      * function. The first dimension in the vector corresponds to the
540      * innermost dimension for storage (i.e. which dimension is
541      * tightly packed in memory) */
542     // @{
543     const std::vector<StorageDim> &storage_dims() const;
544     std::vector<StorageDim> &storage_dims();
545     // @}
546 
547     /** The memory type (heap/stack/shared/etc) used to back this Func. */
548     // @{
549     MemoryType memory_type() const;
550     MemoryType &memory_type();
551     // @}
552 
553     /** You may explicitly bound some of the dimensions of a function,
554      * or constrain them to lie on multiples of a given factor. See
555      * \ref Func::bound and \ref Func::align_bounds */
556     // @{
557     const std::vector<Bound> &bounds() const;
558     std::vector<Bound> &bounds();
559     // @}
560 
561     /** You may explicitly specify an estimate of some of the function
562      * dimensions. See \ref Func::estimate */
563     // @{
564     const std::vector<Bound> &estimates() const;
565     std::vector<Bound> &estimates();
566     // @}
567 
568     /** Mark calls of a function by 'f' to be replaced with its identity
569      * wrapper or clone during the lowering stage. If the string 'f' is empty,
570      * it means replace all calls to the function by all other functions
571      * (excluding itself) in the pipeline with the global identity wrapper.
572      * See \ref Func::in and \ref Func::clone_in for more details. */
573     // @{
574     const std::map<std::string, Internal::FunctionPtr> &wrappers() const;
575     std::map<std::string, Internal::FunctionPtr> &wrappers();
576     void add_wrapper(const std::string &f,
577                      const Internal::FunctionPtr &wrapper);
578     // @}
579 
580     /** At what sites should we inject the allocation and the
581      * computation of this function? The store_level must be outside
582      * of or equal to the compute_level. If the compute_level is
583      * inline, the store_level is meaningless. See \ref Func::store_at
584      * and \ref Func::compute_at */
585     // @{
586     const LoopLevel &store_level() const;
587     const LoopLevel &compute_level() const;
588     LoopLevel &store_level();
589     LoopLevel &compute_level();
590     // @}
591 
592     /** Pass an IRVisitor through to all Exprs referenced in the
593      * Schedule. */
594     void accept(IRVisitor *) const;
595 
596     /** Pass an IRMutator through to all Exprs referenced in the
597      * Schedule. */
598     void mutate(IRMutator *);
599 };
600 
601 /** A schedule for a single stage of a Halide pipeline. Right now this
602  * interface is basically a struct, offering mutable access to its
603  * innards. In the future it may become more encapsulated. */
604 class StageSchedule {
605     IntrusivePtr<StageScheduleContents> contents;
606 
607 public:
StageSchedule(IntrusivePtr<StageScheduleContents> c)608     StageSchedule(IntrusivePtr<StageScheduleContents> c)
609         : contents(std::move(c)) {
610     }
StageSchedule(const StageSchedule & other)611     StageSchedule(const StageSchedule &other)
612         : contents(other.contents) {
613     }
614     StageSchedule();
615 
616     /** Return a copy of this StageSchedule. */
617     StageSchedule get_copy() const;
618 
619     /** This flag is set to true if the dims list has been manipulated
620      * by the user (or if a ScheduleHandle was created that could have
621      * been used to manipulate it). It controls the warning that
622      * occurs if you schedule the vars of the pure step but not the
623      * update steps. */
624     // @{
625     bool &touched();
626     bool touched() const;
627     // @}
628 
629     /** RVars of reduction domain associated with this schedule if there is any. */
630     // @{
631     const std::vector<ReductionVariable> &rvars() const;
632     std::vector<ReductionVariable> &rvars();
633     // @}
634 
635     /** The traversal of the domain of a function can have some of its
636      * dimensions split into sub-dimensions. See \ref Func::split */
637     // @{
638     const std::vector<Split> &splits() const;
639     std::vector<Split> &splits();
640     // @}
641 
642     /** The list and ordering of dimensions used to evaluate this
643      * function, after all splits have taken place. The first
644      * dimension in the vector corresponds to the innermost for loop,
645      * and the last is the outermost. Also specifies what type of for
646      * loop to use for each dimension. Does not specify the bounds on
647      * each dimension. These get inferred from how the function is
648      * used, what the splits are, and any optional bounds in the list below. */
649     // @{
650     const std::vector<Dim> &dims() const;
651     std::vector<Dim> &dims();
652     // @}
653 
654     /** You may perform prefetching in some of the dimensions of a
655      * function. See \ref Func::prefetch */
656     // @{
657     const std::vector<PrefetchDirective> &prefetches() const;
658     std::vector<PrefetchDirective> &prefetches();
659     // @}
660 
661     /** Innermost loop level of fused loop nest for this function stage.
662      * Fusion runs from outermost to this loop level. The stages being fused
663      * should not have producer/consumer relationship. See \ref Func::compute_with
664      * and \ref Func::compute_with */
665     // @{
666     const FuseLoopLevel &fuse_level() const;
667     FuseLoopLevel &fuse_level();
668     // @}
669 
670     /** List of function stages that are to be fused with this function stage
671      * from the outermost loop to a certain loop level. Those function stages
672      * are to be computed AFTER this function stage at the last fused loop level.
673      * This list is populated when realization_order() is called. See
674      * \ref Func::compute_with */
675     // @{
676     const std::vector<FusedPair> &fused_pairs() const;
677     std::vector<FusedPair> &fused_pairs();
678 
679     /** Are race conditions permitted? */
680     // @{
681     bool allow_race_conditions() const;
682     bool &allow_race_conditions();
683     // @}
684 
685     /** Use atomic update? */
686     // @{
687     bool atomic() const;
688     bool &atomic();
689     // @}
690 
691     /** Atomic updates are only allowed on associative reductions.
692      *  We try to prove the associativity, but the user can override
693      *  the associativity test and suppress compiler error if the prover
694      *  fails to recognize the associativity or the user does not care. */
695     // @{
696     bool override_atomic_associativity_test() const;
697     bool &override_atomic_associativity_test();
698     // @}
699 
700     /** Pass an IRVisitor through to all Exprs referenced in the
701      * Schedule. */
702     void accept(IRVisitor *) const;
703 
704     /** Pass an IRMutator through to all Exprs referenced in the
705      * Schedule. */
706     void mutate(IRMutator *);
707 };
708 
709 }  // namespace Internal
710 }  // namespace Halide
711 
712 #endif
713