1 //===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes for various loop fusion utility
10 // methods: these are not passes by themselves but are used either by passes,
11 // optimization sequences, or in turn by other transformation utilities.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
16 #define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
17 
18 #include "mlir/IR/Value.h"
19 #include "mlir/Support/LLVM.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 
23 namespace mlir {
24 class AffineForOp;
25 struct ComputationSliceState;
26 class Operation;
27 
28 // TODO: Extend this module to include utility functions for querying fusion
29 // cost/storage reduction, and for performing the loop fusion transformation.
30 
31 struct FusionResult {
32   enum ResultEnum {
33     Success,
34     FailPrecondition,     // Failed precondition for fusion. (e.g. same block).
35     FailBlockDependence,  // Fusion would violate another dependence in block.
36     FailFusionDependence, // Fusion would reverse dependences between loops.
37     FailComputationSlice, // Unable to compute src loop computation slice.
38     FailIncorrectSlice,   // Slice is computed, but it is incorrect.
39   } value;
FusionResultFusionResult40   FusionResult(ResultEnum v) : value(v) {}
41 };
42 
43 /// Describes the fusion strategy to be used in the Affine loop fusion
44 /// utilities. Currently, it is used to specialized the loop fusion utilities
45 /// with the assumptions made in the AffineLoopFusion pass for producer-consumer
46 /// and sibling fusion, while sharing a single implementation. The latter
47 /// strategies are also limited to scenarios where a single memref is involved
48 /// in the producer-consume or sibling relationship between the candidate
49 /// loops. We use 'memref' to keep track of such a memref.
50 // TODO: Remove 'memref' when we support more generic scenarios.
51 // TODO: Generalize utilities so that producer-consumer and sibling fusion
52 // strategies can be used without the assumptions made in the AffineLoopFusion
53 // pass.
54 class FusionStrategy {
55 public:
56   enum StrategyEnum {
57     // Generic loop fusion: Arbitrary loops are considered for fusion. No
58     // assumptions about a specific fusion strategy from AffineLoopFusion pass
59     // are made.
60     // TODO: Generic fusion is not fully implemented by fusion utilities yet.
61     // It should only be used for testing.
62     Generic,
63     // Producer-consumer fusion: Only loops with a producer-consumer
64     // memref dependence are considered for fusion. Currently, assumptions from
65     // the producer-consumer fusion implementation in AffineLoopFusion pass are
66     // made. See pass for specific details.
67     ProducerConsumer,
68     // Sibling fusion: Only sibling loops with no producer-consumer memref
69     // dependences are considered for fusion. Memref reuse is taken into account
70     // for profitability. Currently, assumptions from the sibling fusion
71     // implementation in AffineLoopFusion pass are made. See pass for specific
72     // details.
73     Sibling
74   };
75 
76   /// Construct a generic or producer-consumer fusion strategy.
FusionStrategy(StrategyEnum strategy)77   FusionStrategy(StrategyEnum strategy) : strategy(strategy) {
78     assert(strategy != Sibling &&
79            "Sibling fusion strategy requires a specific memref");
80   }
81 
82   /// Construct a sibling fusion strategy targeting 'memref'. This construct
83   /// should only be used for sibling fusion.
FusionStrategy(Value memref)84   FusionStrategy(Value memref) : strategy(Sibling), memref(memref) {}
85 
86   /// Returns the fusion strategy.
getStrategy()87   StrategyEnum getStrategy() const { return strategy; };
88 
89   /// Returns the memref attached to this sibling fusion strategy.
getSiblingFusionMemRef()90   Value getSiblingFusionMemRef() const {
91     assert(strategy == Sibling && "Memref is only valid for sibling fusion");
92     return memref;
93   }
94 
95 private:
96   /// Fusion strategy.
97   StrategyEnum strategy;
98 
99   /// Target memref for this fusion transformation. Only used for sibling
100   /// fusion.
101   Value memref;
102 };
103 
104 /// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the
105 /// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult
106 /// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are
107 /// in the same block and dependences would not be violated). Otherwise
108 /// returns a FusionResult explaining why fusion is not feasible.
109 /// NOTE: This function is not feature complete and should only be used in
110 /// testing.
111 /// TODO: Update comments when this function is fully implemented.
112 FusionResult
113 canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth,
114              ComputationSliceState *srcSlice,
115              FusionStrategy fusionStrategy = FusionStrategy::Generic);
116 
117 /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion
118 /// point and source slice loop bounds specified in 'srcSlice'.
119 /// `isInnermostSiblingInsertionFusion` enables cleanup of `srcForOp that is a
120 /// single-iteration reduction loop being sibling-fused into a 'dstForOp'.
121 void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
122                const ComputationSliceState &srcSlice,
123                bool isInnermostSiblingInsertionFusion = false);
124 
125 /// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
126 /// and operation count) for a loop nest up until (and including) the innermost
127 /// loop body.
128 struct LoopNestStats {
129   /// Map from AffineForOp to immediate child AffineForOps in its loop body.
130   DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap;
131   /// Map from AffineForOp to count of operations in its loop body.
132   DenseMap<Operation *, uint64_t> opCountMap;
133   /// Map from AffineForOp to its constant trip count.
134   DenseMap<Operation *, uint64_t> tripCountMap;
135 };
136 
137 /// Collect loop nest statistics (eg. loop trip count and operation count)
138 /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
139 /// returns false otherwise.
140 // TODO: Consider moving this to LoopUtils.
141 bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats);
142 
143 /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.
144 /// Currently, the total cost is computed by counting the total operation
145 /// instance count (i.e. total number of operations in the loop body * loop
146 /// trip count) for the entire loop nest.
147 // TODO: Improve this cost model.
148 int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats);
149 
150 /// Computes and returns in 'computeCost', the total compute cost of fusing the
151 /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently,
152 /// the total cost is computed by counting the total operation instance count
153 /// (i.e. total number of operations in the loop body * loop trip count) for
154 /// the entire loop nest.
155 /// Returns true on success, failure otherwise (e.g. non-constant trip counts).
156 // TODO: Improve this cost model.
157 bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
158                           AffineForOp dstForOp, LoopNestStats &dstStats,
159                           const ComputationSliceState &slice,
160                           int64_t *computeCost);
161 
162 /// Returns in 'producerConsumerMemrefs' the memrefs involved in a
163 /// producer-consumer dependence between write ops in 'srcOps' and read ops in
164 /// 'dstOps'.
165 void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps,
166                                    ArrayRef<Operation *> dstOps,
167                                    DenseSet<Value> &producerConsumerMemrefs);
168 } // end namespace mlir
169 
170 #endif // MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
171