1 //===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file defines prototypes for various loop fusion utility 10 // methods: these are not passes by themselves but are used either by passes, 11 // optimization sequences, or in turn by other transformation utilities. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H 16 #define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H 17 18 #include "mlir/IR/Value.h" 19 #include "mlir/Support/LLVM.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallVector.h" 22 23 namespace mlir { 24 class AffineForOp; 25 struct ComputationSliceState; 26 class Operation; 27 28 // TODO: Extend this module to include utility functions for querying fusion 29 // cost/storage reduction, and for performing the loop fusion transformation. 30 31 struct FusionResult { 32 enum ResultEnum { 33 Success, 34 FailPrecondition, // Failed precondition for fusion. (e.g. same block). 35 FailBlockDependence, // Fusion would violate another dependence in block. 36 FailFusionDependence, // Fusion would reverse dependences between loops. 37 FailComputationSlice, // Unable to compute src loop computation slice. 38 FailIncorrectSlice, // Slice is computed, but it is incorrect. 39 } value; FusionResultFusionResult40 FusionResult(ResultEnum v) : value(v) {} 41 }; 42 43 /// Describes the fusion strategy to be used in the Affine loop fusion 44 /// utilities. Currently, it is used to specialized the loop fusion utilities 45 /// with the assumptions made in the AffineLoopFusion pass for producer-consumer 46 /// and sibling fusion, while sharing a single implementation. The latter 47 /// strategies are also limited to scenarios where a single memref is involved 48 /// in the producer-consume or sibling relationship between the candidate 49 /// loops. We use 'memref' to keep track of such a memref. 50 // TODO: Remove 'memref' when we support more generic scenarios. 51 // TODO: Generalize utilities so that producer-consumer and sibling fusion 52 // strategies can be used without the assumptions made in the AffineLoopFusion 53 // pass. 54 class FusionStrategy { 55 public: 56 enum StrategyEnum { 57 // Generic loop fusion: Arbitrary loops are considered for fusion. No 58 // assumptions about a specific fusion strategy from AffineLoopFusion pass 59 // are made. 60 // TODO: Generic fusion is not fully implemented by fusion utilities yet. 61 // It should only be used for testing. 62 Generic, 63 // Producer-consumer fusion: Only loops with a producer-consumer 64 // memref dependence are considered for fusion. Currently, assumptions from 65 // the producer-consumer fusion implementation in AffineLoopFusion pass are 66 // made. See pass for specific details. 67 ProducerConsumer, 68 // Sibling fusion: Only sibling loops with no producer-consumer memref 69 // dependences are considered for fusion. Memref reuse is taken into account 70 // for profitability. Currently, assumptions from the sibling fusion 71 // implementation in AffineLoopFusion pass are made. See pass for specific 72 // details. 73 Sibling 74 }; 75 76 /// Construct a generic or producer-consumer fusion strategy. FusionStrategy(StrategyEnum strategy)77 FusionStrategy(StrategyEnum strategy) : strategy(strategy) { 78 assert(strategy != Sibling && 79 "Sibling fusion strategy requires a specific memref"); 80 } 81 82 /// Construct a sibling fusion strategy targeting 'memref'. This construct 83 /// should only be used for sibling fusion. FusionStrategy(Value memref)84 FusionStrategy(Value memref) : strategy(Sibling), memref(memref) {} 85 86 /// Returns the fusion strategy. getStrategy()87 StrategyEnum getStrategy() const { return strategy; }; 88 89 /// Returns the memref attached to this sibling fusion strategy. getSiblingFusionMemRef()90 Value getSiblingFusionMemRef() const { 91 assert(strategy == Sibling && "Memref is only valid for sibling fusion"); 92 return memref; 93 } 94 95 private: 96 /// Fusion strategy. 97 StrategyEnum strategy; 98 99 /// Target memref for this fusion transformation. Only used for sibling 100 /// fusion. 101 Value memref; 102 }; 103 104 /// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the 105 /// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult 106 /// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are 107 /// in the same block and dependences would not be violated). Otherwise 108 /// returns a FusionResult explaining why fusion is not feasible. 109 /// NOTE: This function is not feature complete and should only be used in 110 /// testing. 111 /// TODO: Update comments when this function is fully implemented. 112 FusionResult 113 canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth, 114 ComputationSliceState *srcSlice, 115 FusionStrategy fusionStrategy = FusionStrategy::Generic); 116 117 /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion 118 /// point and source slice loop bounds specified in 'srcSlice'. 119 /// `isInnermostSiblingInsertionFusion` enables cleanup of `srcForOp that is a 120 /// single-iteration reduction loop being sibling-fused into a 'dstForOp'. 121 void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, 122 const ComputationSliceState &srcSlice, 123 bool isInnermostSiblingInsertionFusion = false); 124 125 /// LoopNestStats aggregates various per-loop statistics (eg. loop trip count 126 /// and operation count) for a loop nest up until (and including) the innermost 127 /// loop body. 128 struct LoopNestStats { 129 /// Map from AffineForOp to immediate child AffineForOps in its loop body. 130 DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap; 131 /// Map from AffineForOp to count of operations in its loop body. 132 DenseMap<Operation *, uint64_t> opCountMap; 133 /// Map from AffineForOp to its constant trip count. 134 DenseMap<Operation *, uint64_t> tripCountMap; 135 }; 136 137 /// Collect loop nest statistics (eg. loop trip count and operation count) 138 /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success, 139 /// returns false otherwise. 140 // TODO: Consider moving this to LoopUtils. 141 bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats); 142 143 /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'. 144 /// Currently, the total cost is computed by counting the total operation 145 /// instance count (i.e. total number of operations in the loop body * loop 146 /// trip count) for the entire loop nest. 147 // TODO: Improve this cost model. 148 int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats); 149 150 /// Computes and returns in 'computeCost', the total compute cost of fusing the 151 /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently, 152 /// the total cost is computed by counting the total operation instance count 153 /// (i.e. total number of operations in the loop body * loop trip count) for 154 /// the entire loop nest. 155 /// Returns true on success, failure otherwise (e.g. non-constant trip counts). 156 // TODO: Improve this cost model. 157 bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats, 158 AffineForOp dstForOp, LoopNestStats &dstStats, 159 const ComputationSliceState &slice, 160 int64_t *computeCost); 161 162 /// Returns in 'producerConsumerMemrefs' the memrefs involved in a 163 /// producer-consumer dependence between write ops in 'srcOps' and read ops in 164 /// 'dstOps'. 165 void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps, 166 ArrayRef<Operation *> dstOps, 167 DenseSet<Value> &producerConsumerMemrefs); 168 } // end namespace mlir 169 170 #endif // MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H 171