1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13
14 #include "PassDetail.h"
15 #include "mlir/IR/Operation.h"
16 #include "mlir/Interfaces/LoopLikeInterface.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Transforms/Bufferize.h"
19 #include "mlir/Transforms/Passes.h"
20
21 using namespace mlir;
22
23 /// Returns true if the given operation implements a known high-level region-
24 /// based control-flow interface.
isKnownControlFlowInterface(Operation * op)25 static bool isKnownControlFlowInterface(Operation *op) {
26 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
27 }
28
29 /// Check if the size of the allocation is less than the given size. The
30 /// transformation is only applied to small buffers since large buffers could
31 /// exceed the stack space.
isSmallAlloc(Value alloc,unsigned maximumSizeInBytes)32 static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes) {
33 auto type = alloc.getType().dyn_cast<ShapedType>();
34 if (!type || !type.hasStaticShape())
35 return false;
36 return type.getSizeInBits() < maximumSizeInBytes * 8;
37 }
38
39 /// Checks whether the given aliases leave the allocation scope.
40 static bool
leavesAllocationScope(Region * parentRegion,const BufferAliasAnalysis::ValueSetT & aliases)41 leavesAllocationScope(Region *parentRegion,
42 const BufferAliasAnalysis::ValueSetT &aliases) {
43 for (Value alias : aliases) {
44 for (auto *use : alias.getUsers()) {
45 // If there is at least one alias that leaves the parent region, we know
46 // that this alias escapes the whole region and hence the associated
47 // allocation leaves allocation scope.
48 if (use->hasTrait<OpTrait::ReturnLike>() &&
49 use->getParentRegion() == parentRegion)
50 return true;
51 }
52 }
53 return false;
54 }
55
56 /// Checks, if an automated allocation scope for a given alloc value exists.
hasAllocationScope(Value alloc,const BufferAliasAnalysis & aliasAnalysis)57 static bool hasAllocationScope(Value alloc,
58 const BufferAliasAnalysis &aliasAnalysis) {
59 Region *region = alloc.getParentRegion();
60 do {
61 if (Operation *parentOp = region->getParentOp()) {
62 // Check if the operation is an automatic allocation scope and whether an
63 // alias leaves the scope. This means, an allocation yields out of
64 // this scope and can not be transformed in a stack-based allocation.
65 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
66 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
67 return true;
68 // Check if the operation is a known control flow interface and break the
69 // loop to avoid transformation in loops. Furthermore skip transformation
70 // if the operation does not implement a RegionBeanchOpInterface.
71 if (BufferPlacementTransformationBase::isLoop(parentOp) ||
72 !isKnownControlFlowInterface(parentOp))
73 break;
74 }
75 } while ((region = region->getParentRegion()));
76 return false;
77 }
78
79 namespace {
80
81 //===----------------------------------------------------------------------===//
82 // BufferAllocationHoisting
83 //===----------------------------------------------------------------------===//
84
85 /// A base implementation compatible with the `BufferAllocationHoisting` class.
86 struct BufferAllocationHoistingStateBase {
87 /// A pointer to the current dominance info.
88 DominanceInfo *dominators;
89
90 /// The current allocation value.
91 Value allocValue;
92
93 /// The current placement block (if any).
94 Block *placementBlock;
95
96 /// Initializes the state base.
BufferAllocationHoistingStateBase__anon0902a0d40111::BufferAllocationHoistingStateBase97 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
98 Block *placementBlock)
99 : dominators(dominators), allocValue(allocValue),
100 placementBlock(placementBlock) {}
101 };
102
103 /// Implements the actual hoisting logic for allocation nodes.
104 template <typename StateT>
105 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
106 public:
BufferAllocationHoisting(Operation * op)107 BufferAllocationHoisting(Operation *op)
108 : BufferPlacementTransformationBase(op), dominators(op),
109 postDominators(op) {}
110
111 /// Moves allocations upwards.
hoist()112 void hoist() {
113 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
114 Value allocValue = std::get<0>(entry);
115 Operation *definingOp = allocValue.getDefiningOp();
116 assert(definingOp && "No defining op");
117 auto operands = definingOp->getOperands();
118 auto resultAliases = aliases.resolve(allocValue);
119 // Determine the common dominator block of all aliases.
120 Block *dominatorBlock =
121 findCommonDominator(allocValue, resultAliases, dominators);
122 // Init the initial hoisting state.
123 StateT state(&dominators, allocValue, allocValue.getParentBlock());
124 // Check for additional allocation dependencies to compute an upper bound
125 // for hoisting.
126 Block *dependencyBlock = nullptr;
127 if (!operands.empty()) {
128 // If this node has dependencies, check all dependent nodes with respect
129 // to a common post dominator. This ensures that all dependency values
130 // have been computed before allocating the buffer.
131 ValueSetT dependencies(std::next(operands.begin()), operands.end());
132 dependencyBlock = findCommonDominator(*operands.begin(), dependencies,
133 postDominators);
134 }
135
136 // Find the actual placement block and determine the start operation using
137 // an upper placement-block boundary. The idea is that placement block
138 // cannot be moved any further upwards than the given upper bound.
139 Block *placementBlock = findPlacementBlock(
140 state, state.computeUpperBound(dominatorBlock, dependencyBlock));
141 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
142 allocValue, placementBlock, liveness);
143
144 // Move the alloc in front of the start operation.
145 Operation *allocOperation = allocValue.getDefiningOp();
146 allocOperation->moveBefore(startOperation);
147 }
148 }
149
150 private:
151 /// Finds a valid placement block by walking upwards in the CFG until we
152 /// either cannot continue our walk due to constraints (given by the StateT
153 /// implementation) or we have reached the upper-most dominator block.
findPlacementBlock(StateT & state,Block * upperBound)154 Block *findPlacementBlock(StateT &state, Block *upperBound) {
155 Block *currentBlock = state.placementBlock;
156 // Walk from the innermost regions/loops to the outermost regions/loops and
157 // find an appropriate placement block that satisfies the constraint of the
158 // current StateT implementation. Walk until we reach the upperBound block
159 // (if any).
160
161 // If we are not able to find a valid parent operation or an associated
162 // parent block, break the walk loop.
163 Operation *parentOp;
164 Block *parentBlock;
165 while ((parentOp = currentBlock->getParentOp()) &&
166 (parentBlock = parentOp->getBlock()) &&
167 (!upperBound ||
168 dominators.properlyDominates(upperBound, currentBlock))) {
169 // Try to find an immediate dominator and check whether the parent block
170 // is above the immediate dominator (if any).
171 DominanceInfoNode *idom = dominators.getNode(currentBlock)->getIDom();
172 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
173 // If the current immediate dominator is below the placement block, move
174 // to the immediate dominator block.
175 currentBlock = idom->getBlock();
176 state.recordMoveToDominator(currentBlock);
177 } else {
178 // We have to move to our parent block since an immediate dominator does
179 // either not exist or is above our parent block. If we cannot move to
180 // our parent operation due to constraints given by the StateT
181 // implementation, break the walk loop. Furthermore, we should not move
182 // allocations out of unknown region-based control-flow operations.
183 if (!isKnownControlFlowInterface(parentOp) ||
184 !state.isLegalPlacement(parentOp))
185 break;
186 // Move to our parent block by notifying the current StateT
187 // implementation.
188 currentBlock = parentBlock;
189 state.recordMoveToParent(currentBlock);
190 }
191 }
192 // Return the finally determined placement block.
193 return state.placementBlock;
194 }
195
196 /// The dominator info to find the appropriate start operation to move the
197 /// allocs.
198 DominanceInfo dominators;
199
200 /// The post dominator info to move the dependent allocs in the right
201 /// position.
202 PostDominanceInfo postDominators;
203
204 /// The map storing the final placement blocks of a given alloc value.
205 llvm::DenseMap<Value, Block *> placementBlocks;
206 };
207
208 /// A state implementation compatible with the `BufferAllocationHoisting` class
209 /// that hoists allocations into dominator blocks while keeping them inside of
210 /// loops.
211 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
212 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
213
214 /// Computes the upper bound for the placement block search.
computeUpperBound__anon0902a0d40111::BufferAllocationHoistingState215 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
216 // If we do not have a dependency block, the upper bound is given by the
217 // dominator block.
218 if (!dependencyBlock)
219 return dominatorBlock;
220
221 // Find the "lower" block of the dominator and the dependency block to
222 // ensure that we do not move allocations above this block.
223 return dominators->properlyDominates(dominatorBlock, dependencyBlock)
224 ? dependencyBlock
225 : dominatorBlock;
226 }
227
228 /// Returns true if the given operation does not represent a loop.
isLegalPlacement__anon0902a0d40111::BufferAllocationHoistingState229 bool isLegalPlacement(Operation *op) {
230 return !BufferPlacementTransformationBase::isLoop(op);
231 }
232
233 /// Sets the current placement block to the given block.
recordMoveToDominator__anon0902a0d40111::BufferAllocationHoistingState234 void recordMoveToDominator(Block *block) { placementBlock = block; }
235
236 /// Sets the current placement block to the given block.
recordMoveToParent__anon0902a0d40111::BufferAllocationHoistingState237 void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
238 };
239
240 /// A state implementation compatible with the `BufferAllocationHoisting` class
241 /// that hoists allocations out of loops.
242 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
243 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
244
245 /// Remembers the dominator block of all aliases.
246 Block *aliasDominatorBlock;
247
248 /// Computes the upper bound for the placement block search.
computeUpperBound__anon0902a0d40111::BufferAllocationLoopHoistingState249 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
250 aliasDominatorBlock = dominatorBlock;
251 // If there is a dependency block, we have to use this block as an upper
252 // bound to satisfy all allocation value dependencies.
253 return dependencyBlock ? dependencyBlock : nullptr;
254 }
255
256 /// Returns true if the given operation represents a loop and one of the
257 /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
258 /// given loop operation. If this is the case, it indicates that the
259 /// allocation is passed via a back edge.
isLegalPlacement__anon0902a0d40111::BufferAllocationLoopHoistingState260 bool isLegalPlacement(Operation *op) {
261 return BufferPlacementTransformationBase::isLoop(op) &&
262 !dominators->dominates(aliasDominatorBlock, op->getBlock());
263 }
264
265 /// Does not change the internal placement block, as we want to move
266 /// operations out of loops only.
recordMoveToDominator__anon0902a0d40111::BufferAllocationLoopHoistingState267 void recordMoveToDominator(Block *block) {}
268
269 /// Sets the current placement block to the given block.
recordMoveToParent__anon0902a0d40111::BufferAllocationLoopHoistingState270 void recordMoveToParent(Block *block) { placementBlock = block; }
271 };
272
273 //===----------------------------------------------------------------------===//
274 // BufferPlacementPromotion
275 //===----------------------------------------------------------------------===//
276
277 /// Promotes heap-based allocations to stack-based allocations (if possible).
278 class BufferPlacementPromotion : BufferPlacementTransformationBase {
279 public:
BufferPlacementPromotion(Operation * op)280 BufferPlacementPromotion(Operation *op)
281 : BufferPlacementTransformationBase(op) {}
282
283 /// Promote buffers to stack-based allocations.
promote(unsigned maximumSize)284 void promote(unsigned maximumSize) {
285 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
286 Value alloc = std::get<0>(entry);
287 // Checking several requirements to transform an AllocOp into an AllocaOp.
288 // The transformation is done if the allocation is limited to a given
289 // size. Furthermore, a deallocation must not be defined for this
290 // allocation entry and a parent allocation scope must exist.
291 if (!isSmallAlloc(alloc, maximumSize) || std::get<1>(entry) ||
292 !hasAllocationScope(alloc, aliases))
293 continue;
294
295 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
296 alloc, alloc.getParentBlock(), liveness);
297 // Build a new alloca that is associated with its parent
298 // `AutomaticAllocationScope` determined during the initialization phase.
299 OpBuilder builder(startOperation);
300 auto alloca = builder.create<AllocaOp>(
301 alloc.getLoc(), alloc.getType().cast<MemRefType>());
302
303 // Replace the original alloc by a newly created alloca.
304 Operation *allocOp = alloc.getDefiningOp();
305 allocOp->replaceAllUsesWith(alloca.getOperation());
306 allocOp->erase();
307 }
308 }
309 };
310
311 //===----------------------------------------------------------------------===//
312 // BufferOptimizationPasses
313 //===----------------------------------------------------------------------===//
314
315 /// The buffer hoisting pass that hoists allocation nodes into dominating
316 /// blocks.
317 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
318
runOnFunction__anon0902a0d40111::BufferHoistingPass319 void runOnFunction() override {
320 // Hoist all allocations into dominator blocks.
321 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
322 getFunction());
323 optimizer.hoist();
324 }
325 };
326
327 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
328 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
329
runOnFunction__anon0902a0d40111::BufferLoopHoistingPass330 void runOnFunction() override {
331 // Hoist all allocations out of loops.
332 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
333 getFunction());
334 optimizer.hoist();
335 }
336 };
337
338 /// The promote buffer to stack pass that tries to convert alloc nodes into
339 /// alloca nodes.
340 struct PromoteBuffersToStackPass
341 : PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
342
PromoteBuffersToStackPass__anon0902a0d40111::PromoteBuffersToStackPass343 PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes)
344 : maximumSize(maxAllocSizeInBytes) {}
345
runOnFunction__anon0902a0d40111::PromoteBuffersToStackPass346 void runOnFunction() override {
347 // Move all allocation nodes and convert candidates into allocas.
348 BufferPlacementPromotion optimizer(getFunction());
349 optimizer.promote(maximumSize);
350 }
351
352 private:
353 const unsigned maximumSize;
354 };
355
356 } // end anonymous namespace
357
createBufferHoistingPass()358 std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
359 return std::make_unique<BufferHoistingPass>();
360 }
361
createBufferLoopHoistingPass()362 std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
363 return std::make_unique<BufferLoopHoistingPass>();
364 }
365
366 std::unique_ptr<Pass>
createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes)367 mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes) {
368 return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes);
369 }
370