1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct alloc and dealloc positions.
10 // Furthermore, buffer placement also adds required new alloc and copy
11 // operations to ensure that all buffers are deallocated. The main class is the
12 // BufferDeallocationPass class that implements the underlying algorithm. In
13 // order to put allocations and deallocations at safe positions, it is
14 // significantly important to put them into the correct blocks. However, the
15 // liveness analysis does not pay attention to aliases, which can occur due to
16 // branches (and their associated block arguments) in general. For this purpose,
17 // BufferDeallocation firstly finds all possible aliases for a single value
18 // (using the BufferAliasAnalysis class). Consider the following
19 // example:
20 //
21 // ^bb0(%arg0):
22 //   cond_br %cond, ^bb1, ^bb2
23 // ^bb1:
24 //   br ^exit(%arg0)
25 // ^bb2:
26 //   %new_value = ...
27 //   br ^exit(%new_value)
28 // ^exit(%arg1):
29 //   return %arg1;
30 //
31 // We should place the dealloc for %new_value in exit. However, we have to free
32 // the buffer in the same block, because it cannot be freed in the post
33 // dominator. However, this requires a new copy buffer for %arg1 that will
34 // contain the actual contents. Using the class BufferAliasAnalysis, we
35 // will find out that %new_value has a potential alias %arg1. In order to find
36 // the dealloc position we have to find all potential aliases, iterate over
37 // their uses and find the common post-dominator block (note that additional
38 // copies and buffers remove potential aliases and will influence the placement
39 // of the deallocs). In all cases, the computed block can be safely used to free
40 // the %new_value buffer (may be exit or bb2) as it will die and we can use
41 // liveness information to determine the exact operation after which we have to
42 // insert the dealloc. However, the algorithm supports introducing copy buffers
43 // and placing deallocs in safe locations to ensure that all buffers will be
44 // freed in the end.
45 //
46 // TODO:
47 // The current implementation does not support explicit-control-flow loops and
48 // the resulting code will be invalid with respect to program semantics.
49 // However, structured control-flow loops are fully supported. Furthermore, it
50 // doesn't accept functions which return buffers already.
51 //
52 //===----------------------------------------------------------------------===//
53 
54 #include "PassDetail.h"
55 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
56 #include "mlir/Dialect/StandardOps/IR/Ops.h"
57 #include "mlir/IR/Operation.h"
58 #include "mlir/Interfaces/ControlFlowInterfaces.h"
59 #include "mlir/Interfaces/LoopLikeInterface.h"
60 #include "mlir/Pass/Pass.h"
61 #include "mlir/Transforms/Bufferize.h"
62 #include "mlir/Transforms/Passes.h"
63 #include "llvm/ADT/SetOperations.h"
64 
65 using namespace mlir;
66 
67 /// Walks over all immediate return-like terminators in the given region.
68 template <typename FuncT>
walkReturnOperations(Region * region,const FuncT & func)69 static void walkReturnOperations(Region *region, const FuncT &func) {
70   for (Block &block : *region)
71     for (Operation &operation : block) {
72       // Skip non-return-like terminators.
73       if (operation.hasTrait<OpTrait::ReturnLike>())
74         func(&operation);
75     }
76 }
77 
78 /// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions`
79 /// function that initializes the required `operandAttributes` array.
getSuccessorRegions(RegionBranchOpInterface regionInterface,llvm::Optional<unsigned> index,SmallVectorImpl<RegionSuccessor> & successors)80 static void getSuccessorRegions(RegionBranchOpInterface regionInterface,
81                                 llvm::Optional<unsigned> index,
82                                 SmallVectorImpl<RegionSuccessor> &successors) {
83   // Create a list of null attributes for each operand to comply with the
84   // `getSuccessorRegions` interface definition that requires a single
85   // attribute per operand.
86   SmallVector<Attribute, 2> operandAttributes(
87       regionInterface.getOperation()->getNumOperands());
88 
89   // Get all successor regions using the temporarily allocated
90   // `operandAttributes`.
91   regionInterface.getSuccessorRegions(index, operandAttributes, successors);
92 }
93 
94 //===----------------------------------------------------------------------===//
95 // BufferPlacementAllocs
96 //===----------------------------------------------------------------------===//
97 
98 /// Get the start operation to place the given alloc value withing the
99 // specified placement block.
getStartOperation(Value allocValue,Block * placementBlock,const Liveness & liveness)100 Operation *BufferPlacementAllocs::getStartOperation(Value allocValue,
101                                                     Block *placementBlock,
102                                                     const Liveness &liveness) {
103   // We have to ensure that we place the alloc before its first use in this
104   // block.
105   const LivenessBlockInfo &livenessInfo = *liveness.getLiveness(placementBlock);
106   Operation *startOperation = livenessInfo.getStartOperation(allocValue);
107   // Check whether the start operation lies in the desired placement block.
108   // If not, we will use the terminator as this is the last operation in
109   // this block.
110   if (startOperation->getBlock() != placementBlock) {
111     Operation *opInPlacementBlock =
112         placementBlock->findAncestorOpInBlock(*startOperation);
113     startOperation = opInPlacementBlock ? opInPlacementBlock
114                                         : placementBlock->getTerminator();
115   }
116 
117   return startOperation;
118 }
119 
120 /// Finds associated deallocs that can be linked to our allocation nodes (if
121 /// any).
findDealloc(Value allocValue)122 Operation *BufferPlacementAllocs::findDealloc(Value allocValue) {
123   auto userIt = llvm::find_if(allocValue.getUsers(), [&](Operation *user) {
124     auto effectInterface = dyn_cast<MemoryEffectOpInterface>(user);
125     if (!effectInterface)
126       return false;
127     // Try to find a free effect that is applied to one of our values
128     // that will be automatically freed by our pass.
129     SmallVector<MemoryEffects::EffectInstance, 2> effects;
130     effectInterface.getEffectsOnValue(allocValue, effects);
131     return llvm::any_of(effects, [&](MemoryEffects::EffectInstance &it) {
132       return isa<MemoryEffects::Free>(it.getEffect());
133     });
134   });
135   // Assign the associated dealloc operation (if any).
136   return userIt != allocValue.user_end() ? *userIt : nullptr;
137 }
138 
139 /// Initializes the internal list by discovering all supported allocation
140 /// nodes.
BufferPlacementAllocs(Operation * op)141 BufferPlacementAllocs::BufferPlacementAllocs(Operation *op) { build(op); }
142 
143 /// Searches for and registers all supported allocation entries.
build(Operation * op)144 void BufferPlacementAllocs::build(Operation *op) {
145   op->walk([&](MemoryEffectOpInterface opInterface) {
146     // Try to find a single allocation result.
147     SmallVector<MemoryEffects::EffectInstance, 2> effects;
148     opInterface.getEffects(effects);
149 
150     SmallVector<MemoryEffects::EffectInstance, 2> allocateResultEffects;
151     llvm::copy_if(
152         effects, std::back_inserter(allocateResultEffects),
153         [=](MemoryEffects::EffectInstance &it) {
154           Value value = it.getValue();
155           return isa<MemoryEffects::Allocate>(it.getEffect()) && value &&
156                  value.isa<OpResult>() &&
157                  it.getResource() !=
158                      SideEffects::AutomaticAllocationScopeResource::get();
159         });
160     // If there is one result only, we will be able to move the allocation and
161     // (possibly existing) deallocation ops.
162     if (allocateResultEffects.size() != 1)
163       return;
164     // Get allocation result.
165     Value allocValue = allocateResultEffects[0].getValue();
166     // Find the associated dealloc value and register the allocation entry.
167     allocs.push_back(std::make_tuple(allocValue, findDealloc(allocValue)));
168   });
169 }
170 
171 //===----------------------------------------------------------------------===//
172 // BufferPlacementTransformationBase
173 //===----------------------------------------------------------------------===//
174 
175 /// Constructs a new transformation base using the given root operation.
BufferPlacementTransformationBase(Operation * op)176 BufferPlacementTransformationBase::BufferPlacementTransformationBase(
177     Operation *op)
178     : aliases(op), allocs(op), liveness(op) {}
179 
180 /// Returns true if the given operation represents a loop by testing whether it
181 /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
182 /// the case of a `RegionBranchOpInterface`, it checks all region-based control-
183 /// flow edges for cycles.
isLoop(Operation * op)184 bool BufferPlacementTransformationBase::isLoop(Operation *op) {
185   // If the operation implements the `LoopLikeOpInterface` it can be considered
186   // a loop.
187   if (isa<LoopLikeOpInterface>(op))
188     return true;
189 
190   // If the operation does not implement the `RegionBranchOpInterface`, it is
191   // (currently) not possible to detect a loop.
192   RegionBranchOpInterface regionInterface;
193   if (!(regionInterface = dyn_cast<RegionBranchOpInterface>(op)))
194     return false;
195 
196   // Recurses into a region using the current region interface to find potential
197   // cycles.
198   SmallPtrSet<Region *, 4> visitedRegions;
199   std::function<bool(Region *)> recurse = [&](Region *current) {
200     if (!current)
201       return false;
202     // If we have found a back edge, the parent operation induces a loop.
203     if (!visitedRegions.insert(current).second)
204       return true;
205     // Recurses into all region successors.
206     SmallVector<RegionSuccessor, 2> successors;
207     getSuccessorRegions(regionInterface, current->getRegionNumber(),
208                         successors);
209     for (RegionSuccessor &regionEntry : successors)
210       if (recurse(regionEntry.getSuccessor()))
211         return true;
212     return false;
213   };
214 
215   // Start with all entry regions and test whether they induce a loop.
216   SmallVector<RegionSuccessor, 2> successorRegions;
217   getSuccessorRegions(regionInterface, /*index=*/llvm::None, successorRegions);
218   for (RegionSuccessor &regionEntry : successorRegions) {
219     if (recurse(regionEntry.getSuccessor()))
220       return true;
221     visitedRegions.clear();
222   }
223 
224   return false;
225 }
226 
227 namespace {
228 
229 //===----------------------------------------------------------------------===//
230 // Backedges analysis
231 //===----------------------------------------------------------------------===//
232 
233 /// A straight-forward program analysis which detects loop backedges induced by
234 /// explicit control flow.
235 class Backedges {
236 public:
237   using BlockSetT = SmallPtrSet<Block *, 16>;
238   using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
239 
240 public:
241   /// Constructs a new backedges analysis using the op provided.
Backedges(Operation * op)242   Backedges(Operation *op) { recurse(op, op->getBlock()); }
243 
244   /// Returns the number of backedges formed by explicit control flow.
size() const245   size_t size() const { return edgeSet.size(); }
246 
247   /// Returns the start iterator to loop over all backedges.
begin() const248   BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
249 
250   /// Returns the end iterator to loop over all backedges.
end() const251   BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
252 
253 private:
254   /// Enters the current block and inserts a backedge into the `edgeSet` if we
255   /// have already visited the current block. The inserted edge links the given
256   /// `predecessor` with the `current` block.
enter(Block & current,Block * predecessor)257   bool enter(Block &current, Block *predecessor) {
258     bool inserted = visited.insert(&current).second;
259     if (!inserted)
260       edgeSet.insert(std::make_pair(predecessor, &current));
261     return inserted;
262   }
263 
264   /// Leaves the current block.
exit(Block & current)265   void exit(Block &current) { visited.erase(&current); }
266 
267   /// Recurses into the given operation while taking all attached regions into
268   /// account.
recurse(Operation * op,Block * predecessor)269   void recurse(Operation *op, Block *predecessor) {
270     Block *current = op->getBlock();
271     // If the current op implements the `BranchOpInterface`, there can be
272     // cycles in the scope of all successor blocks.
273     if (isa<BranchOpInterface>(op)) {
274       for (Block *succ : current->getSuccessors())
275         recurse(*succ, current);
276     }
277     // Recurse into all distinct regions and check for explicit control-flow
278     // loops.
279     for (Region &region : op->getRegions())
280       recurse(region.front(), current);
281   }
282 
283   /// Recurses into explicit control-flow structures that are given by
284   /// the successor relation defined on the block level.
recurse(Block & block,Block * predecessor)285   void recurse(Block &block, Block *predecessor) {
286     // Try to enter the current block. If this is not possible, we are
287     // currently processing this block and can safely return here.
288     if (!enter(block, predecessor))
289       return;
290 
291     // Recurse into all operations and successor blocks.
292     for (Operation &op : block.getOperations())
293       recurse(&op, predecessor);
294 
295     // Leave the current block.
296     exit(block);
297   }
298 
299   /// Stores all blocks that are currently visited and on the processing stack.
300   BlockSetT visited;
301 
302   /// Stores all backedges in the format (source, target).
303   BackedgeSetT edgeSet;
304 };
305 
306 //===----------------------------------------------------------------------===//
307 // BufferDeallocation
308 //===----------------------------------------------------------------------===//
309 
310 /// The buffer deallocation transformation which ensures that all allocs in the
311 /// program have a corresponding de-allocation. As a side-effect, it might also
312 /// introduce copies that in turn leads to additional allocs and de-allocations.
313 class BufferDeallocation : BufferPlacementTransformationBase {
314 public:
BufferDeallocation(Operation * op)315   BufferDeallocation(Operation *op)
316       : BufferPlacementTransformationBase(op), dominators(op),
317         postDominators(op) {}
318 
319   /// Performs the actual placement/creation of all temporary alloc, copy and
320   /// dealloc nodes.
deallocate()321   void deallocate() {
322     // Add additional allocations and copies that are required.
323     introduceCopies();
324     // Place deallocations for all allocation entries.
325     placeDeallocs();
326   }
327 
328 private:
329   /// Introduces required allocs and copy operations to avoid memory leaks.
introduceCopies()330   void introduceCopies() {
331     // Initialize the set of values that require a dedicated memory free
332     // operation since their operands cannot be safely deallocated in a post
333     // dominator.
334     SmallPtrSet<Value, 8> valuesToFree;
335     llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
336     SmallVector<std::tuple<Value, Block *>, 8> toProcess;
337 
338     // Check dominance relation for proper dominance properties. If the given
339     // value node does not dominate an alias, we will have to create a copy in
340     // order to free all buffers that can potentially leak into a post
341     // dominator.
342     auto findUnsafeValues = [&](Value source, Block *definingBlock) {
343       auto it = aliases.find(source);
344       if (it == aliases.end())
345         return;
346       for (Value value : it->second) {
347         if (valuesToFree.count(value) > 0)
348           continue;
349         Block *parentBlock = value.getParentBlock();
350         // Check whether we have to free this particular block argument or
351         // generic value. We have to free the current alias if it is either
352         // defined in a non-dominated block or it is defined in the same block
353         // but the current value is not dominated by the source value.
354         if (!dominators.dominates(definingBlock, parentBlock) ||
355             (definingBlock == parentBlock && value.isa<BlockArgument>())) {
356           toProcess.emplace_back(value, parentBlock);
357           valuesToFree.insert(value);
358         } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
359                        .second)
360           toProcess.emplace_back(value, definingBlock);
361       }
362     };
363 
364     // Detect possibly unsafe aliases starting from all allocations.
365     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
366       Value allocValue = std::get<0>(entry);
367       findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
368     }
369     // Try to find block arguments that require an explicit free operation
370     // until we reach a fix point.
371     while (!toProcess.empty()) {
372       auto current = toProcess.pop_back_val();
373       findUnsafeValues(std::get<0>(current), std::get<1>(current));
374     }
375 
376     // Update buffer aliases to ensure that we free all buffers and block
377     // arguments at the correct locations.
378     aliases.remove(valuesToFree);
379 
380     // Add new allocs and additional copy operations.
381     for (Value value : valuesToFree) {
382       if (auto blockArg = value.dyn_cast<BlockArgument>())
383         introduceBlockArgCopy(blockArg);
384       else
385         introduceValueCopyForRegionResult(value);
386 
387       // Register the value to require a final dealloc. Note that we do not have
388       // to assign a block here since we do not want to move the allocation node
389       // to another location.
390       allocs.registerAlloc(std::make_tuple(value, nullptr));
391     }
392   }
393 
394   /// Introduces temporary allocs in all predecessors and copies the source
395   /// values into the newly allocated buffers.
introduceBlockArgCopy(BlockArgument blockArg)396   void introduceBlockArgCopy(BlockArgument blockArg) {
397     // Allocate a buffer for the current block argument in the block of
398     // the associated value (which will be a predecessor block by
399     // definition).
400     Block *block = blockArg.getOwner();
401     for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
402       // Get the terminator and the value that will be passed to our
403       // argument.
404       Operation *terminator = (*it)->getTerminator();
405       auto branchInterface = cast<BranchOpInterface>(terminator);
406       // Query the associated source value.
407       Value sourceValue =
408           branchInterface.getSuccessorOperands(it.getSuccessorIndex())
409               .getValue()[blockArg.getArgNumber()];
410       // Create a new alloc and copy at the current location of the terminator.
411       Value alloc = introduceBufferCopy(sourceValue, terminator);
412       // Wire new alloc and successor operand.
413       auto mutableOperands =
414           branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex());
415       if (!mutableOperands.hasValue())
416         terminator->emitError() << "terminators with immutable successor "
417                                    "operands are not supported";
418       else
419         mutableOperands.getValue()
420             .slice(blockArg.getArgNumber(), 1)
421             .assign(alloc);
422     }
423 
424     // Check whether the block argument has implicitly defined predecessors via
425     // the RegionBranchOpInterface. This can be the case if the current block
426     // argument belongs to the first block in a region and the parent operation
427     // implements the RegionBranchOpInterface.
428     Region *argRegion = block->getParent();
429     Operation *parentOp = argRegion->getParentOp();
430     RegionBranchOpInterface regionInterface;
431     if (!argRegion || &argRegion->front() != block ||
432         !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
433       return;
434 
435     introduceCopiesForRegionSuccessors(
436         regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
437         [&](RegionSuccessor &successorRegion) {
438           // Find a predecessor of our argRegion.
439           return successorRegion.getSuccessor() == argRegion;
440         });
441 
442     // Check whether the block argument belongs to an entry region of the
443     // parent operation. In this case, we have to introduce an additional copy
444     // for buffer that is passed to the argument.
445     SmallVector<RegionSuccessor, 2> successorRegions;
446     getSuccessorRegions(regionInterface, /*index=*/llvm::None,
447                         successorRegions);
448     auto *it =
449         llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
450           return successorRegion.getSuccessor() == argRegion;
451         });
452     if (it == successorRegions.end())
453       return;
454 
455     // Determine the actual operand to introduce a copy for and rewire the
456     // operand to point to the copy instead.
457     Value operand =
458         regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber())
459             [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()];
460     Value copy = introduceBufferCopy(operand, parentOp);
461 
462     auto op = llvm::find(parentOp->getOperands(), operand);
463     assert(op != parentOp->getOperands().end() &&
464            "parentOp does not contain operand");
465     parentOp->setOperand(op.getIndex(), copy);
466   }
467 
468   /// Introduces temporary allocs in front of all associated nested-region
469   /// terminators and copies the source values into the newly allocated buffers.
introduceValueCopyForRegionResult(Value value)470   void introduceValueCopyForRegionResult(Value value) {
471     // Get the actual result index in the scope of the parent terminator.
472     Operation *operation = value.getDefiningOp();
473     auto regionInterface = cast<RegionBranchOpInterface>(operation);
474     // Filter successors that return to the parent operation.
475     auto regionPredicate = [&](RegionSuccessor &successorRegion) {
476       // If the RegionSuccessor has no associated successor, it will return to
477       // its parent operation.
478       return !successorRegion.getSuccessor();
479     };
480     // Introduce a copy for all region "results" that are returned to the parent
481     // operation. This is required since the parent's result value has been
482     // considered critical. Therefore, the algorithm assumes that a copy of a
483     // previously allocated buffer is returned by the operation (like in the
484     // case of a block argument).
485     introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(),
486                                        value, regionPredicate);
487   }
488 
489   /// Introduces buffer copies for all terminators in the given regions. The
490   /// regionPredicate is applied to every successor region in order to restrict
491   /// the copies to specific regions.
492   template <typename TPredicate>
introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface,MutableArrayRef<Region> regions,Value argValue,const TPredicate & regionPredicate)493   void introduceCopiesForRegionSuccessors(
494       RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
495       Value argValue, const TPredicate &regionPredicate) {
496     for (Region &region : regions) {
497       // Query the regionInterface to get all successor regions of the current
498       // one.
499       SmallVector<RegionSuccessor, 2> successorRegions;
500       getSuccessorRegions(regionInterface, region.getRegionNumber(),
501                           successorRegions);
502       // Try to find a matching region successor.
503       RegionSuccessor *regionSuccessor =
504           llvm::find_if(successorRegions, regionPredicate);
505       if (regionSuccessor == successorRegions.end())
506         continue;
507       // Get the operand index in the context of the current successor input
508       // bindings.
509       size_t operandIndex =
510           llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
511               .getIndex();
512 
513       // Iterate over all immediate terminator operations to introduce
514       // new buffer allocations. Thereby, the appropriate terminator operand
515       // will be adjusted to point to the newly allocated buffer instead.
516       walkReturnOperations(&region, [&](Operation *terminator) {
517         // Extract the source value from the current terminator.
518         Value sourceValue = terminator->getOperand(operandIndex);
519         // Create a new alloc at the current location of the terminator.
520         Value alloc = introduceBufferCopy(sourceValue, terminator);
521         // Wire alloc and terminator operand.
522         terminator->setOperand(operandIndex, alloc);
523       });
524     }
525   }
526 
527   /// Creates a new memory allocation for the given source value and copies
528   /// its content into the newly allocated buffer. The terminator operation is
529   /// used to insert the alloc and copy operations at the right places.
introduceBufferCopy(Value sourceValue,Operation * terminator)530   Value introduceBufferCopy(Value sourceValue, Operation *terminator) {
531     // Avoid multiple copies of the same source value. This can happen in the
532     // presence of loops when a branch acts as a backedge while also having
533     // another successor that returns to its parent operation. Note: that
534     // copying copied buffers can introduce memory leaks since the invariant of
535     // BufferPlacement assumes that a buffer will be only copied once into a
536     // temporary buffer. Hence, the construction of copy chains introduces
537     // additional allocations that are not tracked automatically by the
538     // algorithm.
539     if (copiedValues.contains(sourceValue))
540       return sourceValue;
541     // Create a new alloc at the current location of the terminator.
542     auto memRefType = sourceValue.getType().cast<MemRefType>();
543     OpBuilder builder(terminator);
544 
545     // Extract information about dynamically shaped types by
546     // extracting their dynamic dimensions.
547     SmallVector<Value, 4> dynamicOperands;
548     for (auto shapeElement : llvm::enumerate(memRefType.getShape())) {
549       if (!ShapedType::isDynamic(shapeElement.value()))
550         continue;
551       dynamicOperands.push_back(builder.create<DimOp>(
552           terminator->getLoc(), sourceValue, shapeElement.index()));
553     }
554 
555     // TODO: provide a generic interface to create dialect-specific
556     // Alloc and CopyOp nodes.
557     auto alloc = builder.create<AllocOp>(terminator->getLoc(), memRefType,
558                                          dynamicOperands);
559 
560     // Create a new copy operation that copies to contents of the old
561     // allocation to the new one.
562     builder.create<linalg::CopyOp>(terminator->getLoc(), sourceValue, alloc);
563 
564     // Remember the copy of original source value.
565     copiedValues.insert(alloc);
566     return alloc;
567   }
568 
569   /// Finds correct dealloc positions according to the algorithm described at
570   /// the top of the file for all alloc nodes and block arguments that can be
571   /// handled by this analysis.
placeDeallocs() const572   void placeDeallocs() const {
573     // Move or insert deallocs using the previously computed information.
574     // These deallocations will be linked to their associated allocation nodes
575     // since they don't have any aliases that can (potentially) increase their
576     // liveness.
577     for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
578       Value alloc = std::get<0>(entry);
579       auto aliasesSet = aliases.resolve(alloc);
580       assert(aliasesSet.size() > 0 && "must contain at least one alias");
581 
582       // Determine the actual block to place the dealloc and get liveness
583       // information.
584       Block *placementBlock =
585           findCommonDominator(alloc, aliasesSet, postDominators);
586       const LivenessBlockInfo *livenessInfo =
587           liveness.getLiveness(placementBlock);
588 
589       // We have to ensure that the dealloc will be after the last use of all
590       // aliases of the given value. We first assume that there are no uses in
591       // the placementBlock and that we can safely place the dealloc at the
592       // beginning.
593       Operation *endOperation = &placementBlock->front();
594 
595       // Iterate over all aliases and ensure that the endOperation will point
596       // to the last operation of all potential aliases in the placementBlock.
597       for (Value alias : aliasesSet) {
598         // Ensure that the start operation is at least the defining operation of
599         // the current alias to avoid invalid placement of deallocs for aliases
600         // without any uses.
601         Operation *beforeOp = endOperation;
602         if (alias.getDefiningOp() &&
603             !(beforeOp = placementBlock->findAncestorOpInBlock(
604                   *alias.getDefiningOp())))
605           continue;
606 
607         Operation *aliasEndOperation =
608             livenessInfo->getEndOperation(alias, beforeOp);
609         // Check whether the aliasEndOperation lies in the desired block and
610         // whether it is behind the current endOperation. If yes, this will be
611         // the new endOperation.
612         if (aliasEndOperation->getBlock() == placementBlock &&
613             endOperation->isBeforeInBlock(aliasEndOperation))
614           endOperation = aliasEndOperation;
615       }
616       // endOperation is the last operation behind which we can safely store
617       // the dealloc taking all potential aliases into account.
618 
619       // If there is an existing dealloc, move it to the right place.
620       Operation *deallocOperation = std::get<1>(entry);
621       if (deallocOperation) {
622         deallocOperation->moveAfter(endOperation);
623       } else {
624         // If the Dealloc position is at the terminator operation of the
625         // block, then the value should escape from a deallocation.
626         Operation *nextOp = endOperation->getNextNode();
627         if (!nextOp)
628           continue;
629         // If there is no dealloc node, insert one in the right place.
630         OpBuilder builder(nextOp);
631         builder.create<DeallocOp>(alloc.getLoc(), alloc);
632       }
633     }
634   }
635 
636   /// The dominator info to find the appropriate start operation to move the
637   /// allocs.
638   DominanceInfo dominators;
639 
640   /// The post dominator info to move the dependent allocs in the right
641   /// position.
642   PostDominanceInfo postDominators;
643 
644   /// Stores already copied allocations to avoid additional copies of copies.
645   ValueSetT copiedValues;
646 };
647 
648 //===----------------------------------------------------------------------===//
649 // BufferDeallocationPass
650 //===----------------------------------------------------------------------===//
651 
652 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
653 /// into the right positions. Furthermore, it inserts additional allocs and
654 /// copies if necessary. It uses the algorithm described at the top of the file.
655 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
656 
runOnFunction__anon715d86a90611::BufferDeallocationPass657   void runOnFunction() override {
658     // Ensure that there are supported loops only.
659     Backedges backedges(getFunction());
660     if (backedges.size()) {
661       getFunction().emitError(
662           "Structured control-flow loops are supported only.");
663       return;
664     }
665 
666     // Place all required temporary alloc, copy and dealloc nodes.
667     BufferDeallocation deallocation(getFunction());
668     deallocation.deallocate();
669   }
670 };
671 
672 } // end anonymous namespace
673 
674 //===----------------------------------------------------------------------===//
675 // BufferDeallocationPass construction
676 //===----------------------------------------------------------------------===//
677 
createBufferDeallocationPass()678 std::unique_ptr<Pass> mlir::createBufferDeallocationPass() {
679   return std::make_unique<BufferDeallocationPass>();
680 }
681