1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct alloc and dealloc positions.
10 // Furthermore, buffer placement also adds required new alloc and copy
11 // operations to ensure that all buffers are deallocated. The main class is the
12 // BufferDeallocationPass class that implements the underlying algorithm. In
13 // order to put allocations and deallocations at safe positions, it is
14 // significantly important to put them into the correct blocks. However, the
15 // liveness analysis does not pay attention to aliases, which can occur due to
16 // branches (and their associated block arguments) in general. For this purpose,
17 // BufferDeallocation firstly finds all possible aliases for a single value
18 // (using the BufferAliasAnalysis class). Consider the following
19 // example:
20 //
21 // ^bb0(%arg0):
22 // cond_br %cond, ^bb1, ^bb2
23 // ^bb1:
24 // br ^exit(%arg0)
25 // ^bb2:
26 // %new_value = ...
27 // br ^exit(%new_value)
28 // ^exit(%arg1):
29 // return %arg1;
30 //
31 // We should place the dealloc for %new_value in exit. However, we have to free
32 // the buffer in the same block, because it cannot be freed in the post
33 // dominator. However, this requires a new copy buffer for %arg1 that will
34 // contain the actual contents. Using the class BufferAliasAnalysis, we
35 // will find out that %new_value has a potential alias %arg1. In order to find
36 // the dealloc position we have to find all potential aliases, iterate over
37 // their uses and find the common post-dominator block (note that additional
38 // copies and buffers remove potential aliases and will influence the placement
39 // of the deallocs). In all cases, the computed block can be safely used to free
40 // the %new_value buffer (may be exit or bb2) as it will die and we can use
41 // liveness information to determine the exact operation after which we have to
42 // insert the dealloc. However, the algorithm supports introducing copy buffers
43 // and placing deallocs in safe locations to ensure that all buffers will be
44 // freed in the end.
45 //
46 // TODO:
47 // The current implementation does not support explicit-control-flow loops and
48 // the resulting code will be invalid with respect to program semantics.
49 // However, structured control-flow loops are fully supported. Furthermore, it
50 // doesn't accept functions which return buffers already.
51 //
52 //===----------------------------------------------------------------------===//
53
54 #include "PassDetail.h"
55 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
56 #include "mlir/Dialect/StandardOps/IR/Ops.h"
57 #include "mlir/IR/Operation.h"
58 #include "mlir/Interfaces/ControlFlowInterfaces.h"
59 #include "mlir/Interfaces/LoopLikeInterface.h"
60 #include "mlir/Pass/Pass.h"
61 #include "mlir/Transforms/Bufferize.h"
62 #include "mlir/Transforms/Passes.h"
63 #include "llvm/ADT/SetOperations.h"
64
65 using namespace mlir;
66
67 /// Walks over all immediate return-like terminators in the given region.
68 template <typename FuncT>
walkReturnOperations(Region * region,const FuncT & func)69 static void walkReturnOperations(Region *region, const FuncT &func) {
70 for (Block &block : *region)
71 for (Operation &operation : block) {
72 // Skip non-return-like terminators.
73 if (operation.hasTrait<OpTrait::ReturnLike>())
74 func(&operation);
75 }
76 }
77
78 /// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions`
79 /// function that initializes the required `operandAttributes` array.
getSuccessorRegions(RegionBranchOpInterface regionInterface,llvm::Optional<unsigned> index,SmallVectorImpl<RegionSuccessor> & successors)80 static void getSuccessorRegions(RegionBranchOpInterface regionInterface,
81 llvm::Optional<unsigned> index,
82 SmallVectorImpl<RegionSuccessor> &successors) {
83 // Create a list of null attributes for each operand to comply with the
84 // `getSuccessorRegions` interface definition that requires a single
85 // attribute per operand.
86 SmallVector<Attribute, 2> operandAttributes(
87 regionInterface.getOperation()->getNumOperands());
88
89 // Get all successor regions using the temporarily allocated
90 // `operandAttributes`.
91 regionInterface.getSuccessorRegions(index, operandAttributes, successors);
92 }
93
94 //===----------------------------------------------------------------------===//
95 // BufferPlacementAllocs
96 //===----------------------------------------------------------------------===//
97
98 /// Get the start operation to place the given alloc value withing the
99 // specified placement block.
getStartOperation(Value allocValue,Block * placementBlock,const Liveness & liveness)100 Operation *BufferPlacementAllocs::getStartOperation(Value allocValue,
101 Block *placementBlock,
102 const Liveness &liveness) {
103 // We have to ensure that we place the alloc before its first use in this
104 // block.
105 const LivenessBlockInfo &livenessInfo = *liveness.getLiveness(placementBlock);
106 Operation *startOperation = livenessInfo.getStartOperation(allocValue);
107 // Check whether the start operation lies in the desired placement block.
108 // If not, we will use the terminator as this is the last operation in
109 // this block.
110 if (startOperation->getBlock() != placementBlock) {
111 Operation *opInPlacementBlock =
112 placementBlock->findAncestorOpInBlock(*startOperation);
113 startOperation = opInPlacementBlock ? opInPlacementBlock
114 : placementBlock->getTerminator();
115 }
116
117 return startOperation;
118 }
119
120 /// Finds associated deallocs that can be linked to our allocation nodes (if
121 /// any).
findDealloc(Value allocValue)122 Operation *BufferPlacementAllocs::findDealloc(Value allocValue) {
123 auto userIt = llvm::find_if(allocValue.getUsers(), [&](Operation *user) {
124 auto effectInterface = dyn_cast<MemoryEffectOpInterface>(user);
125 if (!effectInterface)
126 return false;
127 // Try to find a free effect that is applied to one of our values
128 // that will be automatically freed by our pass.
129 SmallVector<MemoryEffects::EffectInstance, 2> effects;
130 effectInterface.getEffectsOnValue(allocValue, effects);
131 return llvm::any_of(effects, [&](MemoryEffects::EffectInstance &it) {
132 return isa<MemoryEffects::Free>(it.getEffect());
133 });
134 });
135 // Assign the associated dealloc operation (if any).
136 return userIt != allocValue.user_end() ? *userIt : nullptr;
137 }
138
139 /// Initializes the internal list by discovering all supported allocation
140 /// nodes.
BufferPlacementAllocs(Operation * op)141 BufferPlacementAllocs::BufferPlacementAllocs(Operation *op) { build(op); }
142
143 /// Searches for and registers all supported allocation entries.
build(Operation * op)144 void BufferPlacementAllocs::build(Operation *op) {
145 op->walk([&](MemoryEffectOpInterface opInterface) {
146 // Try to find a single allocation result.
147 SmallVector<MemoryEffects::EffectInstance, 2> effects;
148 opInterface.getEffects(effects);
149
150 SmallVector<MemoryEffects::EffectInstance, 2> allocateResultEffects;
151 llvm::copy_if(
152 effects, std::back_inserter(allocateResultEffects),
153 [=](MemoryEffects::EffectInstance &it) {
154 Value value = it.getValue();
155 return isa<MemoryEffects::Allocate>(it.getEffect()) && value &&
156 value.isa<OpResult>() &&
157 it.getResource() !=
158 SideEffects::AutomaticAllocationScopeResource::get();
159 });
160 // If there is one result only, we will be able to move the allocation and
161 // (possibly existing) deallocation ops.
162 if (allocateResultEffects.size() != 1)
163 return;
164 // Get allocation result.
165 Value allocValue = allocateResultEffects[0].getValue();
166 // Find the associated dealloc value and register the allocation entry.
167 allocs.push_back(std::make_tuple(allocValue, findDealloc(allocValue)));
168 });
169 }
170
171 //===----------------------------------------------------------------------===//
172 // BufferPlacementTransformationBase
173 //===----------------------------------------------------------------------===//
174
175 /// Constructs a new transformation base using the given root operation.
BufferPlacementTransformationBase(Operation * op)176 BufferPlacementTransformationBase::BufferPlacementTransformationBase(
177 Operation *op)
178 : aliases(op), allocs(op), liveness(op) {}
179
180 /// Returns true if the given operation represents a loop by testing whether it
181 /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
182 /// the case of a `RegionBranchOpInterface`, it checks all region-based control-
183 /// flow edges for cycles.
isLoop(Operation * op)184 bool BufferPlacementTransformationBase::isLoop(Operation *op) {
185 // If the operation implements the `LoopLikeOpInterface` it can be considered
186 // a loop.
187 if (isa<LoopLikeOpInterface>(op))
188 return true;
189
190 // If the operation does not implement the `RegionBranchOpInterface`, it is
191 // (currently) not possible to detect a loop.
192 RegionBranchOpInterface regionInterface;
193 if (!(regionInterface = dyn_cast<RegionBranchOpInterface>(op)))
194 return false;
195
196 // Recurses into a region using the current region interface to find potential
197 // cycles.
198 SmallPtrSet<Region *, 4> visitedRegions;
199 std::function<bool(Region *)> recurse = [&](Region *current) {
200 if (!current)
201 return false;
202 // If we have found a back edge, the parent operation induces a loop.
203 if (!visitedRegions.insert(current).second)
204 return true;
205 // Recurses into all region successors.
206 SmallVector<RegionSuccessor, 2> successors;
207 getSuccessorRegions(regionInterface, current->getRegionNumber(),
208 successors);
209 for (RegionSuccessor ®ionEntry : successors)
210 if (recurse(regionEntry.getSuccessor()))
211 return true;
212 return false;
213 };
214
215 // Start with all entry regions and test whether they induce a loop.
216 SmallVector<RegionSuccessor, 2> successorRegions;
217 getSuccessorRegions(regionInterface, /*index=*/llvm::None, successorRegions);
218 for (RegionSuccessor ®ionEntry : successorRegions) {
219 if (recurse(regionEntry.getSuccessor()))
220 return true;
221 visitedRegions.clear();
222 }
223
224 return false;
225 }
226
227 namespace {
228
229 //===----------------------------------------------------------------------===//
230 // Backedges analysis
231 //===----------------------------------------------------------------------===//
232
233 /// A straight-forward program analysis which detects loop backedges induced by
234 /// explicit control flow.
235 class Backedges {
236 public:
237 using BlockSetT = SmallPtrSet<Block *, 16>;
238 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
239
240 public:
241 /// Constructs a new backedges analysis using the op provided.
Backedges(Operation * op)242 Backedges(Operation *op) { recurse(op, op->getBlock()); }
243
244 /// Returns the number of backedges formed by explicit control flow.
size() const245 size_t size() const { return edgeSet.size(); }
246
247 /// Returns the start iterator to loop over all backedges.
begin() const248 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
249
250 /// Returns the end iterator to loop over all backedges.
end() const251 BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
252
253 private:
254 /// Enters the current block and inserts a backedge into the `edgeSet` if we
255 /// have already visited the current block. The inserted edge links the given
256 /// `predecessor` with the `current` block.
enter(Block & current,Block * predecessor)257 bool enter(Block ¤t, Block *predecessor) {
258 bool inserted = visited.insert(¤t).second;
259 if (!inserted)
260 edgeSet.insert(std::make_pair(predecessor, ¤t));
261 return inserted;
262 }
263
264 /// Leaves the current block.
exit(Block & current)265 void exit(Block ¤t) { visited.erase(¤t); }
266
267 /// Recurses into the given operation while taking all attached regions into
268 /// account.
recurse(Operation * op,Block * predecessor)269 void recurse(Operation *op, Block *predecessor) {
270 Block *current = op->getBlock();
271 // If the current op implements the `BranchOpInterface`, there can be
272 // cycles in the scope of all successor blocks.
273 if (isa<BranchOpInterface>(op)) {
274 for (Block *succ : current->getSuccessors())
275 recurse(*succ, current);
276 }
277 // Recurse into all distinct regions and check for explicit control-flow
278 // loops.
279 for (Region ®ion : op->getRegions())
280 recurse(region.front(), current);
281 }
282
283 /// Recurses into explicit control-flow structures that are given by
284 /// the successor relation defined on the block level.
recurse(Block & block,Block * predecessor)285 void recurse(Block &block, Block *predecessor) {
286 // Try to enter the current block. If this is not possible, we are
287 // currently processing this block and can safely return here.
288 if (!enter(block, predecessor))
289 return;
290
291 // Recurse into all operations and successor blocks.
292 for (Operation &op : block.getOperations())
293 recurse(&op, predecessor);
294
295 // Leave the current block.
296 exit(block);
297 }
298
299 /// Stores all blocks that are currently visited and on the processing stack.
300 BlockSetT visited;
301
302 /// Stores all backedges in the format (source, target).
303 BackedgeSetT edgeSet;
304 };
305
306 //===----------------------------------------------------------------------===//
307 // BufferDeallocation
308 //===----------------------------------------------------------------------===//
309
310 /// The buffer deallocation transformation which ensures that all allocs in the
311 /// program have a corresponding de-allocation. As a side-effect, it might also
312 /// introduce copies that in turn leads to additional allocs and de-allocations.
313 class BufferDeallocation : BufferPlacementTransformationBase {
314 public:
BufferDeallocation(Operation * op)315 BufferDeallocation(Operation *op)
316 : BufferPlacementTransformationBase(op), dominators(op),
317 postDominators(op) {}
318
319 /// Performs the actual placement/creation of all temporary alloc, copy and
320 /// dealloc nodes.
deallocate()321 void deallocate() {
322 // Add additional allocations and copies that are required.
323 introduceCopies();
324 // Place deallocations for all allocation entries.
325 placeDeallocs();
326 }
327
328 private:
329 /// Introduces required allocs and copy operations to avoid memory leaks.
introduceCopies()330 void introduceCopies() {
331 // Initialize the set of values that require a dedicated memory free
332 // operation since their operands cannot be safely deallocated in a post
333 // dominator.
334 SmallPtrSet<Value, 8> valuesToFree;
335 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
336 SmallVector<std::tuple<Value, Block *>, 8> toProcess;
337
338 // Check dominance relation for proper dominance properties. If the given
339 // value node does not dominate an alias, we will have to create a copy in
340 // order to free all buffers that can potentially leak into a post
341 // dominator.
342 auto findUnsafeValues = [&](Value source, Block *definingBlock) {
343 auto it = aliases.find(source);
344 if (it == aliases.end())
345 return;
346 for (Value value : it->second) {
347 if (valuesToFree.count(value) > 0)
348 continue;
349 Block *parentBlock = value.getParentBlock();
350 // Check whether we have to free this particular block argument or
351 // generic value. We have to free the current alias if it is either
352 // defined in a non-dominated block or it is defined in the same block
353 // but the current value is not dominated by the source value.
354 if (!dominators.dominates(definingBlock, parentBlock) ||
355 (definingBlock == parentBlock && value.isa<BlockArgument>())) {
356 toProcess.emplace_back(value, parentBlock);
357 valuesToFree.insert(value);
358 } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
359 .second)
360 toProcess.emplace_back(value, definingBlock);
361 }
362 };
363
364 // Detect possibly unsafe aliases starting from all allocations.
365 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
366 Value allocValue = std::get<0>(entry);
367 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
368 }
369 // Try to find block arguments that require an explicit free operation
370 // until we reach a fix point.
371 while (!toProcess.empty()) {
372 auto current = toProcess.pop_back_val();
373 findUnsafeValues(std::get<0>(current), std::get<1>(current));
374 }
375
376 // Update buffer aliases to ensure that we free all buffers and block
377 // arguments at the correct locations.
378 aliases.remove(valuesToFree);
379
380 // Add new allocs and additional copy operations.
381 for (Value value : valuesToFree) {
382 if (auto blockArg = value.dyn_cast<BlockArgument>())
383 introduceBlockArgCopy(blockArg);
384 else
385 introduceValueCopyForRegionResult(value);
386
387 // Register the value to require a final dealloc. Note that we do not have
388 // to assign a block here since we do not want to move the allocation node
389 // to another location.
390 allocs.registerAlloc(std::make_tuple(value, nullptr));
391 }
392 }
393
394 /// Introduces temporary allocs in all predecessors and copies the source
395 /// values into the newly allocated buffers.
introduceBlockArgCopy(BlockArgument blockArg)396 void introduceBlockArgCopy(BlockArgument blockArg) {
397 // Allocate a buffer for the current block argument in the block of
398 // the associated value (which will be a predecessor block by
399 // definition).
400 Block *block = blockArg.getOwner();
401 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
402 // Get the terminator and the value that will be passed to our
403 // argument.
404 Operation *terminator = (*it)->getTerminator();
405 auto branchInterface = cast<BranchOpInterface>(terminator);
406 // Query the associated source value.
407 Value sourceValue =
408 branchInterface.getSuccessorOperands(it.getSuccessorIndex())
409 .getValue()[blockArg.getArgNumber()];
410 // Create a new alloc and copy at the current location of the terminator.
411 Value alloc = introduceBufferCopy(sourceValue, terminator);
412 // Wire new alloc and successor operand.
413 auto mutableOperands =
414 branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex());
415 if (!mutableOperands.hasValue())
416 terminator->emitError() << "terminators with immutable successor "
417 "operands are not supported";
418 else
419 mutableOperands.getValue()
420 .slice(blockArg.getArgNumber(), 1)
421 .assign(alloc);
422 }
423
424 // Check whether the block argument has implicitly defined predecessors via
425 // the RegionBranchOpInterface. This can be the case if the current block
426 // argument belongs to the first block in a region and the parent operation
427 // implements the RegionBranchOpInterface.
428 Region *argRegion = block->getParent();
429 Operation *parentOp = argRegion->getParentOp();
430 RegionBranchOpInterface regionInterface;
431 if (!argRegion || &argRegion->front() != block ||
432 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
433 return;
434
435 introduceCopiesForRegionSuccessors(
436 regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
437 [&](RegionSuccessor &successorRegion) {
438 // Find a predecessor of our argRegion.
439 return successorRegion.getSuccessor() == argRegion;
440 });
441
442 // Check whether the block argument belongs to an entry region of the
443 // parent operation. In this case, we have to introduce an additional copy
444 // for buffer that is passed to the argument.
445 SmallVector<RegionSuccessor, 2> successorRegions;
446 getSuccessorRegions(regionInterface, /*index=*/llvm::None,
447 successorRegions);
448 auto *it =
449 llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
450 return successorRegion.getSuccessor() == argRegion;
451 });
452 if (it == successorRegions.end())
453 return;
454
455 // Determine the actual operand to introduce a copy for and rewire the
456 // operand to point to the copy instead.
457 Value operand =
458 regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber())
459 [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()];
460 Value copy = introduceBufferCopy(operand, parentOp);
461
462 auto op = llvm::find(parentOp->getOperands(), operand);
463 assert(op != parentOp->getOperands().end() &&
464 "parentOp does not contain operand");
465 parentOp->setOperand(op.getIndex(), copy);
466 }
467
468 /// Introduces temporary allocs in front of all associated nested-region
469 /// terminators and copies the source values into the newly allocated buffers.
introduceValueCopyForRegionResult(Value value)470 void introduceValueCopyForRegionResult(Value value) {
471 // Get the actual result index in the scope of the parent terminator.
472 Operation *operation = value.getDefiningOp();
473 auto regionInterface = cast<RegionBranchOpInterface>(operation);
474 // Filter successors that return to the parent operation.
475 auto regionPredicate = [&](RegionSuccessor &successorRegion) {
476 // If the RegionSuccessor has no associated successor, it will return to
477 // its parent operation.
478 return !successorRegion.getSuccessor();
479 };
480 // Introduce a copy for all region "results" that are returned to the parent
481 // operation. This is required since the parent's result value has been
482 // considered critical. Therefore, the algorithm assumes that a copy of a
483 // previously allocated buffer is returned by the operation (like in the
484 // case of a block argument).
485 introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(),
486 value, regionPredicate);
487 }
488
489 /// Introduces buffer copies for all terminators in the given regions. The
490 /// regionPredicate is applied to every successor region in order to restrict
491 /// the copies to specific regions.
492 template <typename TPredicate>
introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface,MutableArrayRef<Region> regions,Value argValue,const TPredicate & regionPredicate)493 void introduceCopiesForRegionSuccessors(
494 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
495 Value argValue, const TPredicate ®ionPredicate) {
496 for (Region ®ion : regions) {
497 // Query the regionInterface to get all successor regions of the current
498 // one.
499 SmallVector<RegionSuccessor, 2> successorRegions;
500 getSuccessorRegions(regionInterface, region.getRegionNumber(),
501 successorRegions);
502 // Try to find a matching region successor.
503 RegionSuccessor *regionSuccessor =
504 llvm::find_if(successorRegions, regionPredicate);
505 if (regionSuccessor == successorRegions.end())
506 continue;
507 // Get the operand index in the context of the current successor input
508 // bindings.
509 size_t operandIndex =
510 llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
511 .getIndex();
512
513 // Iterate over all immediate terminator operations to introduce
514 // new buffer allocations. Thereby, the appropriate terminator operand
515 // will be adjusted to point to the newly allocated buffer instead.
516 walkReturnOperations(®ion, [&](Operation *terminator) {
517 // Extract the source value from the current terminator.
518 Value sourceValue = terminator->getOperand(operandIndex);
519 // Create a new alloc at the current location of the terminator.
520 Value alloc = introduceBufferCopy(sourceValue, terminator);
521 // Wire alloc and terminator operand.
522 terminator->setOperand(operandIndex, alloc);
523 });
524 }
525 }
526
527 /// Creates a new memory allocation for the given source value and copies
528 /// its content into the newly allocated buffer. The terminator operation is
529 /// used to insert the alloc and copy operations at the right places.
introduceBufferCopy(Value sourceValue,Operation * terminator)530 Value introduceBufferCopy(Value sourceValue, Operation *terminator) {
531 // Avoid multiple copies of the same source value. This can happen in the
532 // presence of loops when a branch acts as a backedge while also having
533 // another successor that returns to its parent operation. Note: that
534 // copying copied buffers can introduce memory leaks since the invariant of
535 // BufferPlacement assumes that a buffer will be only copied once into a
536 // temporary buffer. Hence, the construction of copy chains introduces
537 // additional allocations that are not tracked automatically by the
538 // algorithm.
539 if (copiedValues.contains(sourceValue))
540 return sourceValue;
541 // Create a new alloc at the current location of the terminator.
542 auto memRefType = sourceValue.getType().cast<MemRefType>();
543 OpBuilder builder(terminator);
544
545 // Extract information about dynamically shaped types by
546 // extracting their dynamic dimensions.
547 SmallVector<Value, 4> dynamicOperands;
548 for (auto shapeElement : llvm::enumerate(memRefType.getShape())) {
549 if (!ShapedType::isDynamic(shapeElement.value()))
550 continue;
551 dynamicOperands.push_back(builder.create<DimOp>(
552 terminator->getLoc(), sourceValue, shapeElement.index()));
553 }
554
555 // TODO: provide a generic interface to create dialect-specific
556 // Alloc and CopyOp nodes.
557 auto alloc = builder.create<AllocOp>(terminator->getLoc(), memRefType,
558 dynamicOperands);
559
560 // Create a new copy operation that copies to contents of the old
561 // allocation to the new one.
562 builder.create<linalg::CopyOp>(terminator->getLoc(), sourceValue, alloc);
563
564 // Remember the copy of original source value.
565 copiedValues.insert(alloc);
566 return alloc;
567 }
568
569 /// Finds correct dealloc positions according to the algorithm described at
570 /// the top of the file for all alloc nodes and block arguments that can be
571 /// handled by this analysis.
placeDeallocs() const572 void placeDeallocs() const {
573 // Move or insert deallocs using the previously computed information.
574 // These deallocations will be linked to their associated allocation nodes
575 // since they don't have any aliases that can (potentially) increase their
576 // liveness.
577 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
578 Value alloc = std::get<0>(entry);
579 auto aliasesSet = aliases.resolve(alloc);
580 assert(aliasesSet.size() > 0 && "must contain at least one alias");
581
582 // Determine the actual block to place the dealloc and get liveness
583 // information.
584 Block *placementBlock =
585 findCommonDominator(alloc, aliasesSet, postDominators);
586 const LivenessBlockInfo *livenessInfo =
587 liveness.getLiveness(placementBlock);
588
589 // We have to ensure that the dealloc will be after the last use of all
590 // aliases of the given value. We first assume that there are no uses in
591 // the placementBlock and that we can safely place the dealloc at the
592 // beginning.
593 Operation *endOperation = &placementBlock->front();
594
595 // Iterate over all aliases and ensure that the endOperation will point
596 // to the last operation of all potential aliases in the placementBlock.
597 for (Value alias : aliasesSet) {
598 // Ensure that the start operation is at least the defining operation of
599 // the current alias to avoid invalid placement of deallocs for aliases
600 // without any uses.
601 Operation *beforeOp = endOperation;
602 if (alias.getDefiningOp() &&
603 !(beforeOp = placementBlock->findAncestorOpInBlock(
604 *alias.getDefiningOp())))
605 continue;
606
607 Operation *aliasEndOperation =
608 livenessInfo->getEndOperation(alias, beforeOp);
609 // Check whether the aliasEndOperation lies in the desired block and
610 // whether it is behind the current endOperation. If yes, this will be
611 // the new endOperation.
612 if (aliasEndOperation->getBlock() == placementBlock &&
613 endOperation->isBeforeInBlock(aliasEndOperation))
614 endOperation = aliasEndOperation;
615 }
616 // endOperation is the last operation behind which we can safely store
617 // the dealloc taking all potential aliases into account.
618
619 // If there is an existing dealloc, move it to the right place.
620 Operation *deallocOperation = std::get<1>(entry);
621 if (deallocOperation) {
622 deallocOperation->moveAfter(endOperation);
623 } else {
624 // If the Dealloc position is at the terminator operation of the
625 // block, then the value should escape from a deallocation.
626 Operation *nextOp = endOperation->getNextNode();
627 if (!nextOp)
628 continue;
629 // If there is no dealloc node, insert one in the right place.
630 OpBuilder builder(nextOp);
631 builder.create<DeallocOp>(alloc.getLoc(), alloc);
632 }
633 }
634 }
635
636 /// The dominator info to find the appropriate start operation to move the
637 /// allocs.
638 DominanceInfo dominators;
639
640 /// The post dominator info to move the dependent allocs in the right
641 /// position.
642 PostDominanceInfo postDominators;
643
644 /// Stores already copied allocations to avoid additional copies of copies.
645 ValueSetT copiedValues;
646 };
647
648 //===----------------------------------------------------------------------===//
649 // BufferDeallocationPass
650 //===----------------------------------------------------------------------===//
651
652 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
653 /// into the right positions. Furthermore, it inserts additional allocs and
654 /// copies if necessary. It uses the algorithm described at the top of the file.
655 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
656
runOnFunction__anon715d86a90611::BufferDeallocationPass657 void runOnFunction() override {
658 // Ensure that there are supported loops only.
659 Backedges backedges(getFunction());
660 if (backedges.size()) {
661 getFunction().emitError(
662 "Structured control-flow loops are supported only.");
663 return;
664 }
665
666 // Place all required temporary alloc, copy and dealloc nodes.
667 BufferDeallocation deallocation(getFunction());
668 deallocation.deallocate();
669 }
670 };
671
672 } // end anonymous namespace
673
674 //===----------------------------------------------------------------------===//
675 // BufferDeallocationPass construction
676 //===----------------------------------------------------------------------===//
677
createBufferDeallocationPass()678 std::unique_ptr<Pass> mlir::createBufferDeallocationPass() {
679 return std::make_unique<BufferDeallocationPass>();
680 }
681