1 #ifndef HALIDE_SCHEDULE_H 2 #define HALIDE_SCHEDULE_H 3 4 /** \file 5 * Defines the internal representation of the schedule for a function 6 */ 7 8 #include <map> 9 #include <string> 10 #include <utility> 11 #include <vector> 12 13 #include "DeviceAPI.h" 14 #include "Expr.h" 15 #include "FunctionPtr.h" 16 #include "Parameter.h" 17 #include "PrefetchDirective.h" 18 19 namespace Halide { 20 21 class Func; 22 struct VarOrRVar; 23 24 namespace Internal { 25 class Function; 26 struct FunctionContents; 27 struct LoopLevelContents; 28 } // namespace Internal 29 30 /** Different ways to handle a tail case in a split when the 31 * factor does not provably divide the extent. */ 32 enum class TailStrategy { 33 /** Round up the extent to be a multiple of the split 34 * factor. Not legal for RVars, as it would change the meaning 35 * of the algorithm. Pros: generates the simplest, fastest 36 * code. Cons: if used on a stage that reads from the input or 37 * writes to the output, constrains the input or output size 38 * to be a multiple of the split factor. */ 39 RoundUp, 40 41 /** Guard the inner loop with an if statement that prevents 42 * evaluation beyond the original extent. Always legal. The if 43 * statement is treated like a boundary condition, and 44 * factored out into a loop epilogue if possible. Pros: no 45 * redundant re-evaluation; does not constrain input our 46 * output sizes. Cons: increases code size due to separate 47 * tail-case handling; vectorization will scalarize in the tail 48 * case to handle the if statement. */ 49 GuardWithIf, 50 51 /** Prevent evaluation beyond the original extent by shifting 52 * the tail case inwards, re-evaluating some points near the 53 * end. Only legal for pure variables in pure definitions. If 54 * the inner loop is very simple, the tail case is treated 55 * like a boundary condition and factored out into an 56 * epilogue. 57 * 58 * This is a good trade-off between several factors. Like 59 * RoundUp, it supports vectorization well, because the inner 60 * loop is always a fixed size with no data-dependent 61 * branching. It increases code size slightly for inner loops 62 * due to the epilogue handling, but not for outer loops 63 * (e.g. loops over tiles). If used on a stage that reads from 64 * an input or writes to an output, this stategy only requires 65 * that the input/output extent be at least the split factor, 66 * instead of a multiple of the split factor as with RoundUp. */ 67 ShiftInwards, 68 69 /** For pure definitions use ShiftInwards. For pure vars in 70 * update definitions use RoundUp. For RVars in update 71 * definitions use GuardWithIf. */ 72 Auto 73 }; 74 75 /** Different ways to handle the case when the start/end of the loops of stages 76 * computed with (fused) are not aligned. */ 77 enum class LoopAlignStrategy { 78 /** Shift the start of the fused loops to align. */ 79 AlignStart, 80 81 /** Shift the end of the fused loops to align. */ 82 AlignEnd, 83 84 /** compute_with will make no attempt to align the start/end of the 85 * fused loops. */ 86 NoAlign, 87 88 /** By default, LoopAlignStrategy is set to NoAlign. */ 89 Auto 90 }; 91 92 /** A reference to a site in a Halide statement at the top of the 93 * body of a particular for loop. Evaluating a region of a halide 94 * function is done by generating a loop nest that spans its 95 * dimensions. We schedule the inputs to that function by 96 * recursively injecting realizations for them at particular sites 97 * in this loop nest. A LoopLevel identifies such a site. The site 98 * can either be a loop nest within all stages of a function 99 * or it can refer to a loop nest within a particular function's 100 * stage (initial definition or updates). 101 * 102 * Note that a LoopLevel is essentially a pointer to an underlying value; 103 * all copies of a LoopLevel refer to the same site, so mutating one copy 104 * (via the set() method) will effectively mutate all copies: 105 \code 106 Func f; 107 Var x; 108 LoopLevel a(f, x); 109 // Both a and b refer to LoopLevel(f, x) 110 LoopLevel b = a; 111 // Now both a and b refer to LoopLevel::root() 112 a.set(LoopLevel::root()); 113 \endcode 114 * This is quite useful when splitting Halide code into utility libraries, as it allows 115 * a library to schedule code according to a caller's specifications, even if the caller 116 * hasn't fully defined its pipeline yet: 117 \code 118 Func demosaic(Func input, 119 LoopLevel intermed_compute_at, 120 LoopLevel intermed_store_at, 121 LoopLevel output_compute_at) { 122 Func intermed = ...; 123 Func output = ...; 124 intermed.compute_at(intermed_compute_at).store_at(intermed_store_at); 125 output.compute_at(output_compute_at); 126 return output; 127 } 128 129 void process() { 130 // Note that these LoopLevels are all undefined when we pass them to demosaic() 131 LoopLevel intermed_compute_at, intermed_store_at, output_compute_at; 132 Func input = ...; 133 Func demosaiced = demosaic(input, intermed_compute_at, intermed_store_at, output_compute_at); 134 Func output = ...; 135 136 // We need to ensure all LoopLevels have a well-defined value prior to lowering: 137 intermed_compute_at.set(LoopLevel(output, y)); 138 intermed_store_at.set(LoopLevel(output, y)); 139 output_compute_at.set(LoopLevel(output, x)); 140 } 141 \endcode 142 */ 143 class LoopLevel { 144 Internal::IntrusivePtr<Internal::LoopLevelContents> contents; 145 LoopLevel(Internal::IntrusivePtr<Internal::LoopLevelContents> c)146 explicit LoopLevel(Internal::IntrusivePtr<Internal::LoopLevelContents> c) 147 : contents(std::move(c)) { 148 } 149 LoopLevel(const std::string &func_name, const std::string &var_name, 150 bool is_rvar, int stage_index, bool locked = false); 151 152 public: 153 /** Return the index of the function stage associated with this loop level. 154 * Asserts if undefined */ 155 int stage_index() const; 156 157 /** Identify the loop nest corresponding to some dimension of some function */ 158 // @{ 159 LoopLevel(const Internal::Function &f, const VarOrRVar &v, int stage_index = -1); 160 LoopLevel(const Func &f, const VarOrRVar &v, int stage_index = -1); 161 // @} 162 163 /** Construct an undefined LoopLevel. Calling any method on an undefined 164 * LoopLevel (other than set()) will assert. */ 165 LoopLevel(); 166 167 /** Construct a special LoopLevel value that implies 168 * that a function should be inlined away. */ 169 static LoopLevel inlined(); 170 171 /** Construct a special LoopLevel value which represents the 172 * location outside of all for loops. */ 173 static LoopLevel root(); 174 175 /** Mutate our contents to match the contents of 'other'. */ 176 void set(const LoopLevel &other); 177 178 // All the public methods below this point are meant only for internal 179 // use by Halide, rather than user code; hence, they are deliberately 180 // documented with plain comments (rather than Doxygen) to avoid being 181 // present in user documentation. 182 183 // Lock this LoopLevel. 184 LoopLevel &lock(); 185 186 // Return the Func name. Asserts if the LoopLevel is_root() or is_inlined() or !defined(). 187 std::string func() const; 188 189 // Return the VarOrRVar. Asserts if the LoopLevel is_root() or is_inlined() or !defined(). 190 VarOrRVar var() const; 191 192 // Return true iff the LoopLevel is defined. (Only LoopLevels created 193 // with the default ctor are undefined.) 194 bool defined() const; 195 196 // Test if a loop level corresponds to inlining the function. 197 bool is_inlined() const; 198 199 // Test if a loop level is 'root', which describes the site 200 // outside of all for loops. 201 bool is_root() const; 202 203 // Return a string of the form func.var -- note that this is safe 204 // to call for root or inline LoopLevels, but asserts if !defined(). 205 std::string to_string() const; 206 207 // Compare this loop level against the variable name of a for 208 // loop, to see if this loop level refers to the site 209 // immediately inside this loop. Asserts if !defined(). 210 bool match(const std::string &loop) const; 211 212 bool match(const LoopLevel &other) const; 213 214 // Check if two loop levels are exactly the same. 215 bool operator==(const LoopLevel &other) const; 216 217 bool operator!=(const LoopLevel &other) const { 218 return !(*this == other); 219 } 220 221 private: 222 void check_defined() const; 223 void check_locked() const; 224 void check_defined_and_locked() const; 225 }; 226 227 struct FuseLoopLevel { 228 LoopLevel level; 229 /** Contains alignment strategies for the fused dimensions (indexed by the 230 * dimension name). If not in the map, use the default alignment strategy 231 * to align the fused dimension (see \ref LoopAlignStrategy::Auto). 232 */ 233 std::map<std::string, LoopAlignStrategy> align; 234 FuseLoopLevelFuseLoopLevel235 FuseLoopLevel() 236 : level(LoopLevel::inlined().lock()) { 237 } FuseLoopLevelFuseLoopLevel238 FuseLoopLevel(const LoopLevel &level, const std::map<std::string, LoopAlignStrategy> &align) 239 : level(level), align(align) { 240 } 241 }; 242 243 namespace Internal { 244 245 class IRMutator; 246 struct ReductionVariable; 247 248 struct Split { 249 std::string old_var, outer, inner; 250 Expr factor; 251 bool exact; // Is it required that the factor divides the extent 252 // of the old var. True for splits of RVars. Forces 253 // tail strategy to be GuardWithIf. 254 TailStrategy tail; 255 256 enum SplitType { SplitVar = 0, 257 RenameVar, 258 FuseVars, 259 PurifyRVar }; 260 261 // If split_type is Rename, then this is just a renaming of the 262 // old_var to the outer and not a split. The inner var should 263 // be ignored, and factor should be one. Renames are kept in 264 // the same list as splits so that ordering between them is 265 // respected. 266 267 // If split type is Purify, this replaces the old_var RVar to 268 // the outer Var. The inner var should be ignored, and factor 269 // should be one. 270 271 // If split_type is Fuse, then this does the opposite of a 272 // split, it joins the outer and inner into the old_var. 273 SplitType split_type; 274 is_renameSplit275 bool is_rename() const { 276 return split_type == RenameVar; 277 } is_splitSplit278 bool is_split() const { 279 return split_type == SplitVar; 280 } is_fuseSplit281 bool is_fuse() const { 282 return split_type == FuseVars; 283 } is_purifySplit284 bool is_purify() const { 285 return split_type == PurifyRVar; 286 } 287 }; 288 289 /** Each Dim below has a dim_type, which tells you what 290 * transformations are legal on it. When you combine two Dims of 291 * distinct DimTypes (e.g. with Stage::fuse), the combined result has 292 * the greater enum value of the two types. */ 293 enum class DimType { 294 /** This dim originated from a Var. You can evaluate a Func at 295 * distinct values of this Var in any order over an interval 296 * that's at least as large as the interval required. In pure 297 * definitions you can even redundantly re-evaluate points. */ 298 PureVar = 0, 299 300 /** The dim originated from an RVar. You can evaluate a Func at 301 * distinct values of this RVar in any order (including in 302 * parallel) over exactly the interval specified in the 303 * RDom. PureRVars can also be reordered arbitrarily in the dims 304 * list, as there are no data hazards between the evaluation of 305 * the Func at distinct values of the RVar. 306 * 307 * The most common case where an RVar is considered pure is RVars 308 * that are used in a way which obeys all the syntactic 309 * constraints that a Var does, e.g: 310 * 311 \code 312 RDom r(0, 100); 313 f(r.x) = f(r.x) + 5; 314 \endcode 315 * 316 * Other cases where RVars are pure are where the sites being 317 * written to by the Func evaluated at one value of the RVar 318 * couldn't possibly collide with the sites being written or read 319 * by the Func at a distinct value of the RVar. For example, r.x 320 * is pure in the following three definitions: 321 * 322 \code 323 324 // This definition writes to even coordinates and reads from the 325 // same site (which no other value of r.x is writing to) and odd 326 // sites (which no other value of r.x is writing to): 327 f(2*r.x) = max(f(2*r.x), f(2*r.x + 7)); 328 329 // This definition writes to scanline zero and reads from the the 330 // same site and scanline one: 331 f(r.x, 0) += f(r.x, 1); 332 333 // This definition reads and writes over non-overlapping ranges: 334 f(r.x + 100) += f(r.x); 335 \endcode 336 * 337 * To give two counterexamples, r.x is not pure in the following 338 * definitions: 339 * 340 \code 341 // The same site is written by distinct values of the RVar 342 // (write-after-write hazard): 343 f(r.x / 2) += f(r.x); 344 345 // One value of r.x reads from a site that another value of r.x 346 // is writing to (read-after-write hazard): 347 f(r.x) += f(r.x + 1); 348 \endcode 349 */ 350 PureRVar, 351 352 /** The dim originated from an RVar. You must evaluate a Func at 353 * distinct values of this RVar in increasing order over precisely 354 * the interval specified in the RDom. ImpureRVars may not be 355 * reordered with respect to other ImpureRVars. 356 * 357 * All RVars are impure by default. Those for which we can prove 358 * no data hazards exist get promoted to PureRVar. There are two 359 * instances in which ImpureRVars may be parallelized or reordered 360 * even in the presence of hazards: 361 * 362 * 1) In the case of an update definition that has been proven to be 363 * an associative and commutative reduction, reordering of 364 * ImpureRVars is allowed, and parallelizing them is allowed if 365 * the update has been made atomic. 366 * 367 * 2) ImpureRVars can also be reordered and parallelized if 368 * Func::allow_race_conditions() has been set. This is the escape 369 * hatch for when there are no hazards but the checks above failed 370 * to prove that (RDom::where can encode arbitrary facts about 371 * non-linear integer arithmetic, which is undecidable), or for 372 * when you don't actually care about the non-determinism 373 * introduced by data hazards (e.g. in the algorithm HOGWILD!). 374 */ 375 ImpureRVar, 376 }; 377 378 /** The Dim struct represents one loop in the schedule's 379 * representation of a loop nest. */ 380 struct Dim { 381 /** Name of the loop variable */ 382 std::string var; 383 384 /** How are the loop values traversed (e.g. unrolled, vectorized, parallel) */ 385 ForType for_type; 386 387 /** On what device does the body of the loop execute (e.g. Host, GPU, Hexagon) */ 388 DeviceAPI device_api; 389 390 /** The DimType tells us what transformations are legal on this 391 * loop (see the DimType enum above). */ 392 DimType dim_type; 393 394 /** Can this loop be evaluated in any order (including in 395 * parallel)? Equivalently, are there no data hazards between 396 * evaluations of the Func at distinct values of this var? */ is_pureDim397 bool is_pure() const { 398 return (dim_type == DimType::PureVar) || (dim_type == DimType::PureRVar); 399 } 400 401 /** Did this loop originate from an RVar (in which case the bounds 402 * of the loops are algorithmically meaningful)? */ is_rvarDim403 bool is_rvar() const { 404 return (dim_type == DimType::PureRVar) || (dim_type == DimType::ImpureRVar); 405 } 406 407 /** Could multiple iterations of this loop happen at the same 408 * time, with reads and writes interleaved in arbitrary ways 409 * according to the memory model of the underlying compiler and 410 * machine? */ is_unordered_parallelDim411 bool is_unordered_parallel() const { 412 return Halide::Internal::is_unordered_parallel(for_type); 413 } 414 415 /** Could multiple iterations of this loop happen at the same 416 * time? Vectorized and GPULanes loop types are parallel but not 417 * unordered, because the loop iterations proceed together in 418 * lockstep with some well-defined outcome if there are hazards. */ is_parallelDim419 bool is_parallel() const { 420 return Halide::Internal::is_parallel(for_type); 421 } 422 }; 423 424 /** A bound on a loop, typically from Func::bound */ 425 struct Bound { 426 /** The loop var being bounded */ 427 std::string var; 428 429 /** Declared min and extent of the loop. min may be undefined if 430 * Func::bound_extent was used. */ 431 Expr min, extent; 432 433 /** If defined, the number of iterations will be a multiple of 434 * "modulus", and the first iteration will be at a value congruent 435 * to "remainder" modulo "modulus". Set by Func::align_bounds. */ 436 Expr modulus, remainder; 437 }; 438 439 /** Properties of one axis of the storage of a Func */ 440 struct StorageDim { 441 /** The var in the pure definition corresponding to this axis */ 442 std::string var; 443 444 /** The bounds allocated (not computed) must be a multiple of 445 * "alignment". Set by Func::align_storage. */ 446 Expr alignment; 447 448 /** If the Func is explicitly folded along this axis (with 449 * Func::fold_storage) this gives the extent of the circular 450 * buffer used, and whether it is used in increasing order 451 * (fold_forward = true) or decreasing order (fold_forward = 452 * false). */ 453 Expr fold_factor; 454 bool fold_forward; 455 }; 456 457 /** This represents two stages with fused loop nests from outermost to 458 * a specific loop level. The loops to compute func_1(stage_1) are 459 * fused with the loops to compute func_2(stage_2) from outermost to 460 * loop level var_name and the computation from stage_1 of func_1 461 * occurs first. 462 */ 463 struct FusedPair { 464 std::string func_1; 465 std::string func_2; 466 size_t stage_1; 467 size_t stage_2; 468 std::string var_name; 469 470 FusedPair() = default; FusedPairFusedPair471 FusedPair(const std::string &f1, size_t s1, const std::string &f2, 472 size_t s2, const std::string &var) 473 : func_1(f1), func_2(f2), stage_1(s1), stage_2(s2), var_name(var) { 474 } 475 476 bool operator==(const FusedPair &other) const { 477 return (func_1 == other.func_1) && (func_2 == other.func_2) && 478 (stage_1 == other.stage_1) && (stage_2 == other.stage_2) && 479 (var_name == other.var_name); 480 } 481 bool operator<(const FusedPair &other) const { 482 if (func_1 != other.func_1) { 483 return func_1 < other.func_1; 484 } 485 if (func_2 != other.func_2) { 486 return func_2 < other.func_2; 487 } 488 if (var_name != other.var_name) { 489 return var_name < other.var_name; 490 } 491 if (stage_1 != other.stage_1) { 492 return stage_1 < other.stage_1; 493 } 494 return stage_2 < other.stage_2; 495 } 496 }; 497 498 struct FuncScheduleContents; 499 struct StageScheduleContents; 500 struct FunctionContents; 501 502 /** A schedule for a Function of a Halide pipeline. This schedule is 503 * applied to all stages of the Function. Right now this interface is 504 * basically a struct, offering mutable access to its innards. 505 * In the future it may become more encapsulated. */ 506 class FuncSchedule { 507 IntrusivePtr<FuncScheduleContents> contents; 508 509 public: FuncSchedule(IntrusivePtr<FuncScheduleContents> c)510 FuncSchedule(IntrusivePtr<FuncScheduleContents> c) 511 : contents(std::move(c)) { 512 } FuncSchedule(const FuncSchedule & other)513 FuncSchedule(const FuncSchedule &other) 514 : contents(other.contents) { 515 } 516 FuncSchedule(); 517 518 /** Return a deep copy of this FuncSchedule. It recursively deep copies all 519 * called functions, schedules, specializations, and reduction domains. This 520 * method takes a map of <old FunctionContents, deep-copied version> as input 521 * and would use the deep-copied FunctionContents from the map if exists 522 * instead of creating a new deep-copy to avoid creating deep-copies of the 523 * same FunctionContents multiple times. 524 */ 525 FuncSchedule deep_copy( 526 std::map<FunctionPtr, FunctionPtr> &copied_map) const; 527 528 /** This flag is set to true if the schedule is memoized. */ 529 // @{ 530 bool &memoized(); 531 bool memoized() const; 532 // @} 533 534 /** Is the production of this Function done asynchronously */ 535 bool &async(); 536 bool async() const; 537 538 /** The list and order of dimensions used to store this 539 * function. The first dimension in the vector corresponds to the 540 * innermost dimension for storage (i.e. which dimension is 541 * tightly packed in memory) */ 542 // @{ 543 const std::vector<StorageDim> &storage_dims() const; 544 std::vector<StorageDim> &storage_dims(); 545 // @} 546 547 /** The memory type (heap/stack/shared/etc) used to back this Func. */ 548 // @{ 549 MemoryType memory_type() const; 550 MemoryType &memory_type(); 551 // @} 552 553 /** You may explicitly bound some of the dimensions of a function, 554 * or constrain them to lie on multiples of a given factor. See 555 * \ref Func::bound and \ref Func::align_bounds */ 556 // @{ 557 const std::vector<Bound> &bounds() const; 558 std::vector<Bound> &bounds(); 559 // @} 560 561 /** You may explicitly specify an estimate of some of the function 562 * dimensions. See \ref Func::estimate */ 563 // @{ 564 const std::vector<Bound> &estimates() const; 565 std::vector<Bound> &estimates(); 566 // @} 567 568 /** Mark calls of a function by 'f' to be replaced with its identity 569 * wrapper or clone during the lowering stage. If the string 'f' is empty, 570 * it means replace all calls to the function by all other functions 571 * (excluding itself) in the pipeline with the global identity wrapper. 572 * See \ref Func::in and \ref Func::clone_in for more details. */ 573 // @{ 574 const std::map<std::string, Internal::FunctionPtr> &wrappers() const; 575 std::map<std::string, Internal::FunctionPtr> &wrappers(); 576 void add_wrapper(const std::string &f, 577 const Internal::FunctionPtr &wrapper); 578 // @} 579 580 /** At what sites should we inject the allocation and the 581 * computation of this function? The store_level must be outside 582 * of or equal to the compute_level. If the compute_level is 583 * inline, the store_level is meaningless. See \ref Func::store_at 584 * and \ref Func::compute_at */ 585 // @{ 586 const LoopLevel &store_level() const; 587 const LoopLevel &compute_level() const; 588 LoopLevel &store_level(); 589 LoopLevel &compute_level(); 590 // @} 591 592 /** Pass an IRVisitor through to all Exprs referenced in the 593 * Schedule. */ 594 void accept(IRVisitor *) const; 595 596 /** Pass an IRMutator through to all Exprs referenced in the 597 * Schedule. */ 598 void mutate(IRMutator *); 599 }; 600 601 /** A schedule for a single stage of a Halide pipeline. Right now this 602 * interface is basically a struct, offering mutable access to its 603 * innards. In the future it may become more encapsulated. */ 604 class StageSchedule { 605 IntrusivePtr<StageScheduleContents> contents; 606 607 public: StageSchedule(IntrusivePtr<StageScheduleContents> c)608 StageSchedule(IntrusivePtr<StageScheduleContents> c) 609 : contents(std::move(c)) { 610 } StageSchedule(const StageSchedule & other)611 StageSchedule(const StageSchedule &other) 612 : contents(other.contents) { 613 } 614 StageSchedule(); 615 616 /** Return a copy of this StageSchedule. */ 617 StageSchedule get_copy() const; 618 619 /** This flag is set to true if the dims list has been manipulated 620 * by the user (or if a ScheduleHandle was created that could have 621 * been used to manipulate it). It controls the warning that 622 * occurs if you schedule the vars of the pure step but not the 623 * update steps. */ 624 // @{ 625 bool &touched(); 626 bool touched() const; 627 // @} 628 629 /** RVars of reduction domain associated with this schedule if there is any. */ 630 // @{ 631 const std::vector<ReductionVariable> &rvars() const; 632 std::vector<ReductionVariable> &rvars(); 633 // @} 634 635 /** The traversal of the domain of a function can have some of its 636 * dimensions split into sub-dimensions. See \ref Func::split */ 637 // @{ 638 const std::vector<Split> &splits() const; 639 std::vector<Split> &splits(); 640 // @} 641 642 /** The list and ordering of dimensions used to evaluate this 643 * function, after all splits have taken place. The first 644 * dimension in the vector corresponds to the innermost for loop, 645 * and the last is the outermost. Also specifies what type of for 646 * loop to use for each dimension. Does not specify the bounds on 647 * each dimension. These get inferred from how the function is 648 * used, what the splits are, and any optional bounds in the list below. */ 649 // @{ 650 const std::vector<Dim> &dims() const; 651 std::vector<Dim> &dims(); 652 // @} 653 654 /** You may perform prefetching in some of the dimensions of a 655 * function. See \ref Func::prefetch */ 656 // @{ 657 const std::vector<PrefetchDirective> &prefetches() const; 658 std::vector<PrefetchDirective> &prefetches(); 659 // @} 660 661 /** Innermost loop level of fused loop nest for this function stage. 662 * Fusion runs from outermost to this loop level. The stages being fused 663 * should not have producer/consumer relationship. See \ref Func::compute_with 664 * and \ref Func::compute_with */ 665 // @{ 666 const FuseLoopLevel &fuse_level() const; 667 FuseLoopLevel &fuse_level(); 668 // @} 669 670 /** List of function stages that are to be fused with this function stage 671 * from the outermost loop to a certain loop level. Those function stages 672 * are to be computed AFTER this function stage at the last fused loop level. 673 * This list is populated when realization_order() is called. See 674 * \ref Func::compute_with */ 675 // @{ 676 const std::vector<FusedPair> &fused_pairs() const; 677 std::vector<FusedPair> &fused_pairs(); 678 679 /** Are race conditions permitted? */ 680 // @{ 681 bool allow_race_conditions() const; 682 bool &allow_race_conditions(); 683 // @} 684 685 /** Use atomic update? */ 686 // @{ 687 bool atomic() const; 688 bool &atomic(); 689 // @} 690 691 /** Atomic updates are only allowed on associative reductions. 692 * We try to prove the associativity, but the user can override 693 * the associativity test and suppress compiler error if the prover 694 * fails to recognize the associativity or the user does not care. */ 695 // @{ 696 bool override_atomic_associativity_test() const; 697 bool &override_atomic_associativity_test(); 698 // @} 699 700 /** Pass an IRVisitor through to all Exprs referenced in the 701 * Schedule. */ 702 void accept(IRVisitor *) const; 703 704 /** Pass an IRMutator through to all Exprs referenced in the 705 * Schedule. */ 706 void mutate(IRMutator *); 707 }; 708 709 } // namespace Internal 710 } // namespace Halide 711 712 #endif 713