1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "PassDetail.h"
15 #include "mlir/IR/Operation.h"
16 #include "mlir/Interfaces/LoopLikeInterface.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Transforms/Bufferize.h"
19 #include "mlir/Transforms/Passes.h"
20 
21 using namespace mlir;
22 
23 /// Returns true if the given operation implements a known high-level region-
24 /// based control-flow interface.
isKnownControlFlowInterface(Operation * op)25 static bool isKnownControlFlowInterface(Operation *op) {
26   return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
27 }
28 
29 /// Check if the size of the allocation is less than the given size. The
30 /// transformation is only applied to small buffers since large buffers could
31 /// exceed the stack space.
isSmallAlloc(Value alloc,unsigned maximumSizeInBytes)32 static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes) {
33   auto type = alloc.getType().dyn_cast<ShapedType>();
34   if (!type || !type.hasStaticShape())
35     return false;
36   return type.getSizeInBits() < maximumSizeInBytes * 8;
37 }
38 
39 /// Checks whether the given aliases leave the allocation scope.
40 static bool
leavesAllocationScope(Region * parentRegion,const BufferAliasAnalysis::ValueSetT & aliases)41 leavesAllocationScope(Region *parentRegion,
42                       const BufferAliasAnalysis::ValueSetT &aliases) {
43   for (Value alias : aliases) {
44     for (auto *use : alias.getUsers()) {
45       // If there is at least one alias that leaves the parent region, we know
46       // that this alias escapes the whole region and hence the associated
47       // allocation leaves allocation scope.
48       if (use->hasTrait<OpTrait::ReturnLike>() &&
49           use->getParentRegion() == parentRegion)
50         return true;
51     }
52   }
53   return false;
54 }
55 
56 /// Checks, if an automated allocation scope for a given alloc value exists.
hasAllocationScope(Value alloc,const BufferAliasAnalysis & aliasAnalysis)57 static bool hasAllocationScope(Value alloc,
58                                const BufferAliasAnalysis &aliasAnalysis) {
59   Region *region = alloc.getParentRegion();
60   do {
61     if (Operation *parentOp = region->getParentOp()) {
62       // Check if the operation is an automatic allocation scope and whether an
63       // alias leaves the scope. This means, an allocation yields out of
64       // this scope and can not be transformed in a stack-based allocation.
65       if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
66           !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
67         return true;
68       // Check if the operation is a known control flow interface and break the
69       // loop to avoid transformation in loops. Furthermore skip transformation
70       // if the operation does not implement a RegionBeanchOpInterface.
71       if (BufferPlacementTransformationBase::isLoop(parentOp) ||
72           !isKnownControlFlowInterface(parentOp))
73         break;
74     }
75   } while ((region = region->getParentRegion()));
76   return false;
77 }
78 
79 namespace {
80 
81 //===----------------------------------------------------------------------===//
82 // BufferAllocationHoisting
83 //===----------------------------------------------------------------------===//
84 
85 /// A base implementation compatible with the `BufferAllocationHoisting` class.
86 struct BufferAllocationHoistingStateBase {
87   /// A pointer to the current dominance info.
88   DominanceInfo *dominators;
89 
90   /// The current allocation value.
91   Value allocValue;
92 
93   /// The current placement block (if any).
94   Block *placementBlock;
95 
96   /// Initializes the state base.
BufferAllocationHoistingStateBase__anon0902a0d40111::BufferAllocationHoistingStateBase97   BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
98                                     Block *placementBlock)
99       : dominators(dominators), allocValue(allocValue),
100         placementBlock(placementBlock) {}
101 };
102 
103 /// Implements the actual hoisting logic for allocation nodes.
104 template <typename StateT>
105 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
106 public:
BufferAllocationHoisting(Operation * op)107   BufferAllocationHoisting(Operation *op)
108       : BufferPlacementTransformationBase(op), dominators(op),
109         postDominators(op) {}
110 
111   /// Moves allocations upwards.
hoist()112   void hoist() {
113     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
114       Value allocValue = std::get<0>(entry);
115       Operation *definingOp = allocValue.getDefiningOp();
116       assert(definingOp && "No defining op");
117       auto operands = definingOp->getOperands();
118       auto resultAliases = aliases.resolve(allocValue);
119       // Determine the common dominator block of all aliases.
120       Block *dominatorBlock =
121           findCommonDominator(allocValue, resultAliases, dominators);
122       // Init the initial hoisting state.
123       StateT state(&dominators, allocValue, allocValue.getParentBlock());
124       // Check for additional allocation dependencies to compute an upper bound
125       // for hoisting.
126       Block *dependencyBlock = nullptr;
127       if (!operands.empty()) {
128         // If this node has dependencies, check all dependent nodes with respect
129         // to a common post dominator. This ensures that all dependency values
130         // have been computed before allocating the buffer.
131         ValueSetT dependencies(std::next(operands.begin()), operands.end());
132         dependencyBlock = findCommonDominator(*operands.begin(), dependencies,
133                                               postDominators);
134       }
135 
136       // Find the actual placement block and determine the start operation using
137       // an upper placement-block boundary. The idea is that placement block
138       // cannot be moved any further upwards than the given upper bound.
139       Block *placementBlock = findPlacementBlock(
140           state, state.computeUpperBound(dominatorBlock, dependencyBlock));
141       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
142           allocValue, placementBlock, liveness);
143 
144       // Move the alloc in front of the start operation.
145       Operation *allocOperation = allocValue.getDefiningOp();
146       allocOperation->moveBefore(startOperation);
147     }
148   }
149 
150 private:
151   /// Finds a valid placement block by walking upwards in the CFG until we
152   /// either cannot continue our walk due to constraints (given by the StateT
153   /// implementation) or we have reached the upper-most dominator block.
findPlacementBlock(StateT & state,Block * upperBound)154   Block *findPlacementBlock(StateT &state, Block *upperBound) {
155     Block *currentBlock = state.placementBlock;
156     // Walk from the innermost regions/loops to the outermost regions/loops and
157     // find an appropriate placement block that satisfies the constraint of the
158     // current StateT implementation. Walk until we reach the upperBound block
159     // (if any).
160 
161     // If we are not able to find a valid parent operation or an associated
162     // parent block, break the walk loop.
163     Operation *parentOp;
164     Block *parentBlock;
165     while ((parentOp = currentBlock->getParentOp()) &&
166            (parentBlock = parentOp->getBlock()) &&
167            (!upperBound ||
168             dominators.properlyDominates(upperBound, currentBlock))) {
169       // Try to find an immediate dominator and check whether the parent block
170       // is above the immediate dominator (if any).
171       DominanceInfoNode *idom = dominators.getNode(currentBlock)->getIDom();
172       if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
173         // If the current immediate dominator is below the placement block, move
174         // to the immediate dominator block.
175         currentBlock = idom->getBlock();
176         state.recordMoveToDominator(currentBlock);
177       } else {
178         // We have to move to our parent block since an immediate dominator does
179         // either not exist or is above our parent block. If we cannot move to
180         // our parent operation due to constraints given by the StateT
181         // implementation, break the walk loop. Furthermore, we should not move
182         // allocations out of unknown region-based control-flow operations.
183         if (!isKnownControlFlowInterface(parentOp) ||
184             !state.isLegalPlacement(parentOp))
185           break;
186         // Move to our parent block by notifying the current StateT
187         // implementation.
188         currentBlock = parentBlock;
189         state.recordMoveToParent(currentBlock);
190       }
191     }
192     // Return the finally determined placement block.
193     return state.placementBlock;
194   }
195 
196   /// The dominator info to find the appropriate start operation to move the
197   /// allocs.
198   DominanceInfo dominators;
199 
200   /// The post dominator info to move the dependent allocs in the right
201   /// position.
202   PostDominanceInfo postDominators;
203 
204   /// The map storing the final placement blocks of a given alloc value.
205   llvm::DenseMap<Value, Block *> placementBlocks;
206 };
207 
208 /// A state implementation compatible with the `BufferAllocationHoisting` class
209 /// that hoists allocations into dominator blocks while keeping them inside of
210 /// loops.
211 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
212   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
213 
214   /// Computes the upper bound for the placement block search.
computeUpperBound__anon0902a0d40111::BufferAllocationHoistingState215   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
216     // If we do not have a dependency block, the upper bound is given by the
217     // dominator block.
218     if (!dependencyBlock)
219       return dominatorBlock;
220 
221     // Find the "lower" block of the dominator and the dependency block to
222     // ensure that we do not move allocations above this block.
223     return dominators->properlyDominates(dominatorBlock, dependencyBlock)
224                ? dependencyBlock
225                : dominatorBlock;
226   }
227 
228   /// Returns true if the given operation does not represent a loop.
isLegalPlacement__anon0902a0d40111::BufferAllocationHoistingState229   bool isLegalPlacement(Operation *op) {
230     return !BufferPlacementTransformationBase::isLoop(op);
231   }
232 
233   /// Sets the current placement block to the given block.
recordMoveToDominator__anon0902a0d40111::BufferAllocationHoistingState234   void recordMoveToDominator(Block *block) { placementBlock = block; }
235 
236   /// Sets the current placement block to the given block.
recordMoveToParent__anon0902a0d40111::BufferAllocationHoistingState237   void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
238 };
239 
240 /// A state implementation compatible with the `BufferAllocationHoisting` class
241 /// that hoists allocations out of loops.
242 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
243   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
244 
245   /// Remembers the dominator block of all aliases.
246   Block *aliasDominatorBlock;
247 
248   /// Computes the upper bound for the placement block search.
computeUpperBound__anon0902a0d40111::BufferAllocationLoopHoistingState249   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
250     aliasDominatorBlock = dominatorBlock;
251     // If there is a dependency block, we have to use this block as an upper
252     // bound to satisfy all allocation value dependencies.
253     return dependencyBlock ? dependencyBlock : nullptr;
254   }
255 
256   /// Returns true if the given operation represents a loop and one of the
257   /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
258   /// given loop operation. If this is the case, it indicates that the
259   /// allocation is passed via a back edge.
isLegalPlacement__anon0902a0d40111::BufferAllocationLoopHoistingState260   bool isLegalPlacement(Operation *op) {
261     return BufferPlacementTransformationBase::isLoop(op) &&
262            !dominators->dominates(aliasDominatorBlock, op->getBlock());
263   }
264 
265   /// Does not change the internal placement block, as we want to move
266   /// operations out of loops only.
recordMoveToDominator__anon0902a0d40111::BufferAllocationLoopHoistingState267   void recordMoveToDominator(Block *block) {}
268 
269   /// Sets the current placement block to the given block.
recordMoveToParent__anon0902a0d40111::BufferAllocationLoopHoistingState270   void recordMoveToParent(Block *block) { placementBlock = block; }
271 };
272 
273 //===----------------------------------------------------------------------===//
274 // BufferPlacementPromotion
275 //===----------------------------------------------------------------------===//
276 
277 /// Promotes heap-based allocations to stack-based allocations (if possible).
278 class BufferPlacementPromotion : BufferPlacementTransformationBase {
279 public:
BufferPlacementPromotion(Operation * op)280   BufferPlacementPromotion(Operation *op)
281       : BufferPlacementTransformationBase(op) {}
282 
283   /// Promote buffers to stack-based allocations.
promote(unsigned maximumSize)284   void promote(unsigned maximumSize) {
285     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
286       Value alloc = std::get<0>(entry);
287       // Checking several requirements to transform an AllocOp into an AllocaOp.
288       // The transformation is done if the allocation is limited to a given
289       // size. Furthermore, a deallocation must not be defined for this
290       // allocation entry and a parent allocation scope must exist.
291       if (!isSmallAlloc(alloc, maximumSize) || std::get<1>(entry) ||
292           !hasAllocationScope(alloc, aliases))
293         continue;
294 
295       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
296           alloc, alloc.getParentBlock(), liveness);
297       // Build a new alloca that is associated with its parent
298       // `AutomaticAllocationScope` determined during the initialization phase.
299       OpBuilder builder(startOperation);
300       auto alloca = builder.create<AllocaOp>(
301           alloc.getLoc(), alloc.getType().cast<MemRefType>());
302 
303       // Replace the original alloc by a newly created alloca.
304       Operation *allocOp = alloc.getDefiningOp();
305       allocOp->replaceAllUsesWith(alloca.getOperation());
306       allocOp->erase();
307     }
308   }
309 };
310 
311 //===----------------------------------------------------------------------===//
312 // BufferOptimizationPasses
313 //===----------------------------------------------------------------------===//
314 
315 /// The buffer hoisting pass that hoists allocation nodes into dominating
316 /// blocks.
317 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
318 
runOnFunction__anon0902a0d40111::BufferHoistingPass319   void runOnFunction() override {
320     // Hoist all allocations into dominator blocks.
321     BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
322         getFunction());
323     optimizer.hoist();
324   }
325 };
326 
327 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
328 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
329 
runOnFunction__anon0902a0d40111::BufferLoopHoistingPass330   void runOnFunction() override {
331     // Hoist all allocations out of loops.
332     BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
333         getFunction());
334     optimizer.hoist();
335   }
336 };
337 
338 /// The promote buffer to stack pass that tries to convert alloc nodes into
339 /// alloca nodes.
340 struct PromoteBuffersToStackPass
341     : PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
342 
PromoteBuffersToStackPass__anon0902a0d40111::PromoteBuffersToStackPass343   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes)
344       : maximumSize(maxAllocSizeInBytes) {}
345 
runOnFunction__anon0902a0d40111::PromoteBuffersToStackPass346   void runOnFunction() override {
347     // Move all allocation nodes and convert candidates into allocas.
348     BufferPlacementPromotion optimizer(getFunction());
349     optimizer.promote(maximumSize);
350   }
351 
352 private:
353   const unsigned maximumSize;
354 };
355 
356 } // end anonymous namespace
357 
createBufferHoistingPass()358 std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
359   return std::make_unique<BufferHoistingPass>();
360 }
361 
createBufferLoopHoistingPass()362 std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
363   return std::make_unique<BufferLoopHoistingPass>();
364 }
365 
366 std::unique_ptr<Pass>
createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes)367 mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes) {
368   return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes);
369 }
370