1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic
11 /// Reference Counting and is a system for managing reference counts for objects
12 /// in Objective C.
13 ///
14 /// The optimizations performed include elimination of redundant, partially
15 /// redundant, and inconsequential reference count operations, elimination of
16 /// redundant weak pointer operations, and numerous minor simplifications.
17 ///
18 /// WARNING: This file knows about certain library functions. It recognizes them
19 /// by name, and hardwires knowledge of their semantics.
20 ///
21 /// WARNING: This file knows about how certain Objective-C library functions are
22 /// used. Naive LLVM IR transformations which would otherwise be
23 /// behavior-preserving may break these assumptions.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "ARCRuntimeEntryPoints.h"
28 #include "BlotMapVector.h"
29 #include "DependencyAnalysis.h"
30 #include "ObjCARC.h"
31 #include "ProvenanceAnalysis.h"
32 #include "PtrState.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/Analysis/AliasAnalysis.h"
39 #include "llvm/Analysis/ObjCARCAliasAnalysis.h"
40 #include "llvm/Analysis/ObjCARCAnalysisUtils.h"
41 #include "llvm/Analysis/ObjCARCInstKind.h"
42 #include "llvm/Analysis/ObjCARCUtil.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CFG.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/EHPersonalities.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalVariable.h"
51 #include "llvm/IR/InstIterator.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/CommandLine.h"
62 #include "llvm/Support/Compiler.h"
63 #include "llvm/Support/Debug.h"
64 #include "llvm/Support/ErrorHandling.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Transforms/ObjCARC.h"
67 #include <cassert>
68 #include <iterator>
69 #include <utility>
70 
71 using namespace llvm;
72 using namespace llvm::objcarc;
73 
74 #define DEBUG_TYPE "objc-arc-opts"
75 
76 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states",
77     cl::Hidden,
78     cl::desc("Maximum number of ptr states the optimizer keeps track of"),
79     cl::init(4095));
80 
81 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
82 /// @{
83 
84 /// This is similar to GetRCIdentityRoot but it stops as soon
85 /// as it finds a value with multiple uses.
FindSingleUseIdentifiedObject(const Value * Arg)86 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
87   // ConstantData (like ConstantPointerNull and UndefValue) is used across
88   // modules.  It's never a single-use value.
89   if (isa<ConstantData>(Arg))
90     return nullptr;
91 
92   if (Arg->hasOneUse()) {
93     if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
94       return FindSingleUseIdentifiedObject(BC->getOperand(0));
95     if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
96       if (GEP->hasAllZeroIndices())
97         return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
98     if (IsForwarding(GetBasicARCInstKind(Arg)))
99       return FindSingleUseIdentifiedObject(
100                cast<CallInst>(Arg)->getArgOperand(0));
101     if (!IsObjCIdentifiedObject(Arg))
102       return nullptr;
103     return Arg;
104   }
105 
106   // If we found an identifiable object but it has multiple uses, but they are
107   // trivial uses, we can still consider this to be a single-use value.
108   if (IsObjCIdentifiedObject(Arg)) {
109     for (const User *U : Arg->users())
110       if (!U->use_empty() || GetRCIdentityRoot(U) != Arg)
111          return nullptr;
112 
113     return Arg;
114   }
115 
116   return nullptr;
117 }
118 
119 /// @}
120 ///
121 /// \defgroup ARCOpt ARC Optimization.
122 /// @{
123 
124 // TODO: On code like this:
125 //
126 // objc_retain(%x)
127 // stuff_that_cannot_release()
128 // objc_autorelease(%x)
129 // stuff_that_cannot_release()
130 // objc_retain(%x)
131 // stuff_that_cannot_release()
132 // objc_autorelease(%x)
133 //
134 // The second retain and autorelease can be deleted.
135 
136 // TODO: It should be possible to delete
137 // objc_autoreleasePoolPush and objc_autoreleasePoolPop
138 // pairs if nothing is actually autoreleased between them. Also, autorelease
139 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
140 // after inlining) can be turned into plain release calls.
141 
142 // TODO: Critical-edge splitting. If the optimial insertion point is
143 // a critical edge, the current algorithm has to fail, because it doesn't
144 // know how to split edges. It should be possible to make the optimizer
145 // think in terms of edges, rather than blocks, and then split critical
146 // edges on demand.
147 
148 // TODO: OptimizeSequences could generalized to be Interprocedural.
149 
150 // TODO: Recognize that a bunch of other objc runtime calls have
151 // non-escaping arguments and non-releasing arguments, and may be
152 // non-autoreleasing.
153 
154 // TODO: Sink autorelease calls as far as possible. Unfortunately we
155 // usually can't sink them past other calls, which would be the main
156 // case where it would be useful.
157 
158 // TODO: The pointer returned from objc_loadWeakRetained is retained.
159 
160 // TODO: Delete release+retain pairs (rare).
161 
162 STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
163 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
164 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
165 STATISTIC(NumRets,        "Number of return value forwarding "
166                           "retain+autoreleases eliminated");
167 STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
168 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
169 #ifndef NDEBUG
170 STATISTIC(NumRetainsBeforeOpt,
171           "Number of retains before optimization");
172 STATISTIC(NumReleasesBeforeOpt,
173           "Number of releases before optimization");
174 STATISTIC(NumRetainsAfterOpt,
175           "Number of retains after optimization");
176 STATISTIC(NumReleasesAfterOpt,
177           "Number of releases after optimization");
178 #endif
179 
180 namespace {
181 
182   /// Per-BasicBlock state.
183   class BBState {
184     /// The number of unique control paths from the entry which can reach this
185     /// block.
186     unsigned TopDownPathCount = 0;
187 
188     /// The number of unique control paths to exits from this block.
189     unsigned BottomUpPathCount = 0;
190 
191     /// The top-down traversal uses this to record information known about a
192     /// pointer at the bottom of each block.
193     BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown;
194 
195     /// The bottom-up traversal uses this to record information known about a
196     /// pointer at the top of each block.
197     BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp;
198 
199     /// Effective predecessors of the current block ignoring ignorable edges and
200     /// ignored backedges.
201     SmallVector<BasicBlock *, 2> Preds;
202 
203     /// Effective successors of the current block ignoring ignorable edges and
204     /// ignored backedges.
205     SmallVector<BasicBlock *, 2> Succs;
206 
207   public:
208     static const unsigned OverflowOccurredValue;
209 
210     BBState() = default;
211 
212     using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator;
213     using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator;
214 
top_down_ptr_begin()215     top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
top_down_ptr_end()216     top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
top_down_ptr_begin() const217     const_top_down_ptr_iterator top_down_ptr_begin() const {
218       return PerPtrTopDown.begin();
219     }
top_down_ptr_end() const220     const_top_down_ptr_iterator top_down_ptr_end() const {
221       return PerPtrTopDown.end();
222     }
hasTopDownPtrs() const223     bool hasTopDownPtrs() const {
224       return !PerPtrTopDown.empty();
225     }
226 
top_down_ptr_list_size() const227     unsigned top_down_ptr_list_size() const {
228       return std::distance(top_down_ptr_begin(), top_down_ptr_end());
229     }
230 
231     using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator;
232     using const_bottom_up_ptr_iterator =
233         decltype(PerPtrBottomUp)::const_iterator;
234 
bottom_up_ptr_begin()235     bottom_up_ptr_iterator bottom_up_ptr_begin() {
236       return PerPtrBottomUp.begin();
237     }
bottom_up_ptr_end()238     bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
bottom_up_ptr_begin() const239     const_bottom_up_ptr_iterator bottom_up_ptr_begin() const {
240       return PerPtrBottomUp.begin();
241     }
bottom_up_ptr_end() const242     const_bottom_up_ptr_iterator bottom_up_ptr_end() const {
243       return PerPtrBottomUp.end();
244     }
hasBottomUpPtrs() const245     bool hasBottomUpPtrs() const {
246       return !PerPtrBottomUp.empty();
247     }
248 
bottom_up_ptr_list_size() const249     unsigned bottom_up_ptr_list_size() const {
250       return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end());
251     }
252 
253     /// Mark this block as being an entry block, which has one path from the
254     /// entry by definition.
SetAsEntry()255     void SetAsEntry() { TopDownPathCount = 1; }
256 
257     /// Mark this block as being an exit block, which has one path to an exit by
258     /// definition.
SetAsExit()259     void SetAsExit()  { BottomUpPathCount = 1; }
260 
261     /// Attempt to find the PtrState object describing the top down state for
262     /// pointer Arg. Return a new initialized PtrState describing the top down
263     /// state for Arg if we do not find one.
getPtrTopDownState(const Value * Arg)264     TopDownPtrState &getPtrTopDownState(const Value *Arg) {
265       return PerPtrTopDown[Arg];
266     }
267 
268     /// Attempt to find the PtrState object describing the bottom up state for
269     /// pointer Arg. Return a new initialized PtrState describing the bottom up
270     /// state for Arg if we do not find one.
getPtrBottomUpState(const Value * Arg)271     BottomUpPtrState &getPtrBottomUpState(const Value *Arg) {
272       return PerPtrBottomUp[Arg];
273     }
274 
275     /// Attempt to find the PtrState object describing the bottom up state for
276     /// pointer Arg.
findPtrBottomUpState(const Value * Arg)277     bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) {
278       return PerPtrBottomUp.find(Arg);
279     }
280 
clearBottomUpPointers()281     void clearBottomUpPointers() {
282       PerPtrBottomUp.clear();
283     }
284 
clearTopDownPointers()285     void clearTopDownPointers() {
286       PerPtrTopDown.clear();
287     }
288 
289     void InitFromPred(const BBState &Other);
290     void InitFromSucc(const BBState &Other);
291     void MergePred(const BBState &Other);
292     void MergeSucc(const BBState &Other);
293 
294     /// Compute the number of possible unique paths from an entry to an exit
295     /// which pass through this block. This is only valid after both the
296     /// top-down and bottom-up traversals are complete.
297     ///
298     /// Returns true if overflow occurred. Returns false if overflow did not
299     /// occur.
GetAllPathCountWithOverflow(unsigned & PathCount) const300     bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
301       if (TopDownPathCount == OverflowOccurredValue ||
302           BottomUpPathCount == OverflowOccurredValue)
303         return true;
304       unsigned long long Product =
305         (unsigned long long)TopDownPathCount*BottomUpPathCount;
306       // Overflow occurred if any of the upper bits of Product are set or if all
307       // the lower bits of Product are all set.
308       return (Product >> 32) ||
309              ((PathCount = Product) == OverflowOccurredValue);
310     }
311 
312     // Specialized CFG utilities.
313     using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
314 
pred_begin() const315     edge_iterator pred_begin() const { return Preds.begin(); }
pred_end() const316     edge_iterator pred_end() const { return Preds.end(); }
succ_begin() const317     edge_iterator succ_begin() const { return Succs.begin(); }
succ_end() const318     edge_iterator succ_end() const { return Succs.end(); }
319 
addSucc(BasicBlock * Succ)320     void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
addPred(BasicBlock * Pred)321     void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
322 
isExit() const323     bool isExit() const { return Succs.empty(); }
324   };
325 
326 } // end anonymous namespace
327 
328 const unsigned BBState::OverflowOccurredValue = 0xffffffff;
329 
330 namespace llvm {
331 
332 raw_ostream &operator<<(raw_ostream &OS,
333                         BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
334 
335 } // end namespace llvm
336 
InitFromPred(const BBState & Other)337 void BBState::InitFromPred(const BBState &Other) {
338   PerPtrTopDown = Other.PerPtrTopDown;
339   TopDownPathCount = Other.TopDownPathCount;
340 }
341 
InitFromSucc(const BBState & Other)342 void BBState::InitFromSucc(const BBState &Other) {
343   PerPtrBottomUp = Other.PerPtrBottomUp;
344   BottomUpPathCount = Other.BottomUpPathCount;
345 }
346 
347 /// The top-down traversal uses this to merge information about predecessors to
348 /// form the initial state for a new block.
MergePred(const BBState & Other)349 void BBState::MergePred(const BBState &Other) {
350   if (TopDownPathCount == OverflowOccurredValue)
351     return;
352 
353   // Other.TopDownPathCount can be 0, in which case it is either dead or a
354   // loop backedge. Loop backedges are special.
355   TopDownPathCount += Other.TopDownPathCount;
356 
357   // In order to be consistent, we clear the top down pointers when by adding
358   // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
359   // has not occurred.
360   if (TopDownPathCount == OverflowOccurredValue) {
361     clearTopDownPointers();
362     return;
363   }
364 
365   // Check for overflow. If we have overflow, fall back to conservative
366   // behavior.
367   if (TopDownPathCount < Other.TopDownPathCount) {
368     TopDownPathCount = OverflowOccurredValue;
369     clearTopDownPointers();
370     return;
371   }
372 
373   // For each entry in the other set, if our set has an entry with the same key,
374   // merge the entries. Otherwise, copy the entry and merge it with an empty
375   // entry.
376   for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end();
377        MI != ME; ++MI) {
378     auto Pair = PerPtrTopDown.insert(*MI);
379     Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second,
380                              /*TopDown=*/true);
381   }
382 
383   // For each entry in our set, if the other set doesn't have an entry with the
384   // same key, force it to merge with an empty entry.
385   for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI)
386     if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
387       MI->second.Merge(TopDownPtrState(), /*TopDown=*/true);
388 }
389 
390 /// The bottom-up traversal uses this to merge information about successors to
391 /// form the initial state for a new block.
MergeSucc(const BBState & Other)392 void BBState::MergeSucc(const BBState &Other) {
393   if (BottomUpPathCount == OverflowOccurredValue)
394     return;
395 
396   // Other.BottomUpPathCount can be 0, in which case it is either dead or a
397   // loop backedge. Loop backedges are special.
398   BottomUpPathCount += Other.BottomUpPathCount;
399 
400   // In order to be consistent, we clear the top down pointers when by adding
401   // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
402   // has not occurred.
403   if (BottomUpPathCount == OverflowOccurredValue) {
404     clearBottomUpPointers();
405     return;
406   }
407 
408   // Check for overflow. If we have overflow, fall back to conservative
409   // behavior.
410   if (BottomUpPathCount < Other.BottomUpPathCount) {
411     BottomUpPathCount = OverflowOccurredValue;
412     clearBottomUpPointers();
413     return;
414   }
415 
416   // For each entry in the other set, if our set has an entry with the
417   // same key, merge the entries. Otherwise, copy the entry and merge
418   // it with an empty entry.
419   for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end();
420        MI != ME; ++MI) {
421     auto Pair = PerPtrBottomUp.insert(*MI);
422     Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second,
423                              /*TopDown=*/false);
424   }
425 
426   // For each entry in our set, if the other set doesn't have an entry
427   // with the same key, force it to merge with an empty entry.
428   for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME;
429        ++MI)
430     if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
431       MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false);
432 }
433 
operator <<(raw_ostream & OS,BBState & BBInfo)434 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
435   // Dump the pointers we are tracking.
436   OS << "    TopDown State:\n";
437   if (!BBInfo.hasTopDownPtrs()) {
438     LLVM_DEBUG(dbgs() << "        NONE!\n");
439   } else {
440     for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end();
441          I != E; ++I) {
442       const PtrState &P = I->second;
443       OS << "        Ptr: " << *I->first
444          << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
445          << "\n            ImpreciseRelease: "
446            << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
447          << "            HasCFGHazards:    "
448            << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
449          << "            KnownPositive:    "
450            << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
451          << "            Seq:              "
452          << P.GetSeq() << "\n";
453     }
454   }
455 
456   OS << "    BottomUp State:\n";
457   if (!BBInfo.hasBottomUpPtrs()) {
458     LLVM_DEBUG(dbgs() << "        NONE!\n");
459   } else {
460     for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end();
461          I != E; ++I) {
462       const PtrState &P = I->second;
463       OS << "        Ptr: " << *I->first
464          << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
465          << "\n            ImpreciseRelease: "
466            << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
467          << "            HasCFGHazards:    "
468            << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
469          << "            KnownPositive:    "
470            << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
471          << "            Seq:              "
472          << P.GetSeq() << "\n";
473     }
474   }
475 
476   return OS;
477 }
478 
479 namespace {
480 
481   /// The main ARC optimization pass.
482 class ObjCARCOpt {
483   bool Changed = false;
484   bool CFGChanged = false;
485   ProvenanceAnalysis PA;
486 
487   /// A cache of references to runtime entry point constants.
488   ARCRuntimeEntryPoints EP;
489 
490   /// A cache of MDKinds that can be passed into other functions to propagate
491   /// MDKind identifiers.
492   ARCMDKindCache MDKindCache;
493 
494   BundledRetainClaimRVs *BundledInsts = nullptr;
495 
496   /// A flag indicating whether the optimization that removes or moves
497   /// retain/release pairs should be performed.
498   bool DisableRetainReleasePairing = false;
499 
500   /// Flags which determine whether each of the interesting runtime functions
501   /// is in fact used in the current function.
502   unsigned UsedInThisFunction;
503 
504   DenseMap<BasicBlock *, ColorVector> BlockEHColors;
505 
506   bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
507   void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
508                                  ARCInstKind &Class);
509   void OptimizeIndividualCalls(Function &F);
510 
511   /// Optimize an individual call, optionally passing the
512   /// GetArgRCIdentityRoot if it has already been computed.
513   void OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
514                                   ARCInstKind Class, const Value *Arg);
515 
516   /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV.  If the
517   /// optimization occurs, returns true to indicate that the caller should
518   /// assume the instructions are dead.
519   bool OptimizeInlinedAutoreleaseRVCall(Function &F, Instruction *Inst,
520                                         const Value *&Arg, ARCInstKind Class,
521                                         Instruction *AutoreleaseRV,
522                                         const Value *&AutoreleaseRVArg);
523 
524   void CheckForCFGHazards(const BasicBlock *BB,
525                           DenseMap<const BasicBlock *, BBState> &BBStates,
526                           BBState &MyStates) const;
527   bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
528                                 BlotMapVector<Value *, RRInfo> &Retains,
529                                 BBState &MyStates);
530   bool VisitBottomUp(BasicBlock *BB,
531                      DenseMap<const BasicBlock *, BBState> &BBStates,
532                      BlotMapVector<Value *, RRInfo> &Retains);
533   bool VisitInstructionTopDown(
534       Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates,
535       const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
536           &ReleaseInsertPtToRCIdentityRoots);
537   bool VisitTopDown(
538       BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates,
539       DenseMap<Value *, RRInfo> &Releases,
540       const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
541           &ReleaseInsertPtToRCIdentityRoots);
542   bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
543              BlotMapVector<Value *, RRInfo> &Retains,
544              DenseMap<Value *, RRInfo> &Releases);
545 
546   void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
547                  BlotMapVector<Value *, RRInfo> &Retains,
548                  DenseMap<Value *, RRInfo> &Releases,
549                  SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
550 
551   bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
552                                 BlotMapVector<Value *, RRInfo> &Retains,
553                                 DenseMap<Value *, RRInfo> &Releases, Module *M,
554                                 Instruction *Retain,
555                                 SmallVectorImpl<Instruction *> &DeadInsts,
556                                 RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
557                                 Value *Arg, bool KnownSafe,
558                                 bool &AnyPairsCompletelyEliminated);
559 
560   bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
561                             BlotMapVector<Value *, RRInfo> &Retains,
562                             DenseMap<Value *, RRInfo> &Releases, Module *M);
563 
564   void OptimizeWeakCalls(Function &F);
565 
566   bool OptimizeSequences(Function &F);
567 
568   void OptimizeReturns(Function &F);
569 
570   template <typename PredicateT>
cloneOpBundlesIf(CallBase * CI,SmallVectorImpl<OperandBundleDef> & OpBundles,PredicateT Predicate)571   static void cloneOpBundlesIf(CallBase *CI,
572                                SmallVectorImpl<OperandBundleDef> &OpBundles,
573                                PredicateT Predicate) {
574     for (unsigned I = 0, E = CI->getNumOperandBundles(); I != E; ++I) {
575       OperandBundleUse B = CI->getOperandBundleAt(I);
576       if (Predicate(B))
577         OpBundles.emplace_back(B);
578     }
579   }
580 
addOpBundleForFunclet(BasicBlock * BB,SmallVectorImpl<OperandBundleDef> & OpBundles)581   void addOpBundleForFunclet(BasicBlock *BB,
582                              SmallVectorImpl<OperandBundleDef> &OpBundles) {
583     if (!BlockEHColors.empty()) {
584       const ColorVector &CV = BlockEHColors.find(BB)->second;
585       assert(CV.size() > 0 && "Uncolored block");
586       for (BasicBlock *EHPadBB : CV)
587         if (auto *EHPad = dyn_cast<FuncletPadInst>(EHPadBB->getFirstNonPHI())) {
588           OpBundles.emplace_back("funclet", EHPad);
589           return;
590         }
591     }
592   }
593 
594 #ifndef NDEBUG
595   void GatherStatistics(Function &F, bool AfterOptimization = false);
596 #endif
597 
598   public:
599     void init(Function &F);
600     bool run(Function &F, AAResults &AA);
hasCFGChanged() const601     bool hasCFGChanged() const { return CFGChanged; }
602 };
603 } // end anonymous namespace
604 
605 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
606 /// not a return value.
607 bool
OptimizeRetainRVCall(Function & F,Instruction * RetainRV)608 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
609   // Check for the argument being from an immediately preceding call or invoke.
610   const Value *Arg = GetArgRCIdentityRoot(RetainRV);
611   if (const Instruction *Call = dyn_cast<CallBase>(Arg)) {
612     if (Call->getParent() == RetainRV->getParent()) {
613       BasicBlock::const_iterator I(Call);
614       ++I;
615       while (IsNoopInstruction(&*I))
616         ++I;
617       if (&*I == RetainRV)
618         return false;
619     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
620       BasicBlock *RetainRVParent = RetainRV->getParent();
621       if (II->getNormalDest() == RetainRVParent) {
622         BasicBlock::const_iterator I = RetainRVParent->begin();
623         while (IsNoopInstruction(&*I))
624           ++I;
625         if (&*I == RetainRV)
626           return false;
627       }
628     }
629   }
630 
631   assert(!BundledInsts->contains(RetainRV) &&
632          "a bundled retainRV's argument should be a call");
633 
634   // Turn it to a plain objc_retain.
635   Changed = true;
636   ++NumPeeps;
637 
638   LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
639                        "objc_retain since the operand is not a return value.\n"
640                        "Old = "
641                     << *RetainRV << "\n");
642 
643   Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
644   cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
645 
646   LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n");
647 
648   return false;
649 }
650 
OptimizeInlinedAutoreleaseRVCall(Function & F,Instruction * Inst,const Value * & Arg,ARCInstKind Class,Instruction * AutoreleaseRV,const Value * & AutoreleaseRVArg)651 bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall(
652     Function &F, Instruction *Inst, const Value *&Arg, ARCInstKind Class,
653     Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) {
654   if (BundledInsts->contains(Inst))
655     return false;
656 
657   // Must be in the same basic block.
658   assert(Inst->getParent() == AutoreleaseRV->getParent());
659 
660   // Must operate on the same root.
661   Arg = GetArgRCIdentityRoot(Inst);
662   AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV);
663   if (Arg != AutoreleaseRVArg) {
664     // If there isn't an exact match, check if we have equivalent PHIs.
665     const PHINode *PN = dyn_cast<PHINode>(Arg);
666     if (!PN)
667       return false;
668 
669     SmallVector<const Value *, 4> ArgUsers;
670     getEquivalentPHIs(*PN, ArgUsers);
671     if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg))
672       return false;
673   }
674 
675   // Okay, this is a match.  Merge them.
676   ++NumPeeps;
677   LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '"
678                     << *AutoreleaseRV << "' paired with '" << *Inst << "'\n");
679 
680   // Delete the RV pair, starting with the AutoreleaseRV.
681   AutoreleaseRV->replaceAllUsesWith(
682       cast<CallInst>(AutoreleaseRV)->getArgOperand(0));
683   Changed = true;
684   EraseInstruction(AutoreleaseRV);
685   if (Class == ARCInstKind::RetainRV) {
686     // AutoreleaseRV and RetainRV cancel out.  Delete the RetainRV.
687     Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
688     EraseInstruction(Inst);
689     return true;
690   }
691 
692   // UnsafeClaimRV is a frontend peephole for RetainRV + Release.  Since the
693   // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release.
694   assert(Class == ARCInstKind::UnsafeClaimRV);
695   Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0);
696   CallInst *Release = CallInst::Create(
697       EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", Inst);
698   assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) &&
699          "Expected UnsafeClaimRV to be safe to tail call");
700   Release->setTailCall();
701   Inst->replaceAllUsesWith(CallArg);
702   EraseInstruction(Inst);
703 
704   // Run the normal optimizations on Release.
705   OptimizeIndividualCallImpl(F, Release, ARCInstKind::Release, Arg);
706   return true;
707 }
708 
709 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
710 /// used as a return value.
OptimizeAutoreleaseRVCall(Function & F,Instruction * AutoreleaseRV,ARCInstKind & Class)711 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
712                                            Instruction *AutoreleaseRV,
713                                            ARCInstKind &Class) {
714   // Check for a return of the pointer value.
715   const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV);
716 
717   // If the argument is ConstantPointerNull or UndefValue, its other users
718   // aren't actually interesting to look at.
719   if (isa<ConstantData>(Ptr))
720     return;
721 
722   SmallVector<const Value *, 2> Users;
723   Users.push_back(Ptr);
724 
725   // Add PHIs that are equivalent to Ptr to Users.
726   if (const PHINode *PN = dyn_cast<PHINode>(Ptr))
727     getEquivalentPHIs(*PN, Users);
728 
729   do {
730     Ptr = Users.pop_back_val();
731     for (const User *U : Ptr->users()) {
732       if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV)
733         return;
734       if (isa<BitCastInst>(U))
735         Users.push_back(U);
736     }
737   } while (!Users.empty());
738 
739   Changed = true;
740   ++NumPeeps;
741 
742   LLVM_DEBUG(
743       dbgs() << "Transforming objc_autoreleaseReturnValue => "
744                 "objc_autorelease since its operand is not used as a return "
745                 "value.\n"
746                 "Old = "
747              << *AutoreleaseRV << "\n");
748 
749   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
750   Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
751   AutoreleaseRVCI->setCalledFunction(NewDecl);
752   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
753   Class = ARCInstKind::Autorelease;
754 
755   LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
756 }
757 
758 /// Visit each call, one at a time, and make simplifications without doing any
759 /// additional analysis.
OptimizeIndividualCalls(Function & F)760 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
761   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
762   // Reset all the flags in preparation for recomputing them.
763   UsedInThisFunction = 0;
764 
765   // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired
766   // with RetainRV and UnsafeClaimRV.
767   Instruction *DelayedAutoreleaseRV = nullptr;
768   const Value *DelayedAutoreleaseRVArg = nullptr;
769   auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) {
770     assert(!DelayedAutoreleaseRV || !AutoreleaseRV);
771     DelayedAutoreleaseRV = AutoreleaseRV;
772     DelayedAutoreleaseRVArg = nullptr;
773   };
774   auto optimizeDelayedAutoreleaseRV = [&]() {
775     if (!DelayedAutoreleaseRV)
776       return;
777     OptimizeIndividualCallImpl(F, DelayedAutoreleaseRV,
778                                ARCInstKind::AutoreleaseRV,
779                                DelayedAutoreleaseRVArg);
780     setDelayedAutoreleaseRV(nullptr);
781   };
782   auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) {
783     // Nothing to delay, but we may as well skip the logic below.
784     if (!DelayedAutoreleaseRV)
785       return true;
786 
787     // If we hit the end of the basic block we're not going to find an RV-pair.
788     // Stop delaying.
789     if (NonARCInst->isTerminator())
790       return false;
791 
792     // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and
793     // UnsafeClaimRV, it's probably safe to skip over even opaque function calls
794     // here since OptimizeInlinedAutoreleaseRVCall will confirm that they
795     // have the same RCIdentityRoot.  However, what really matters is
796     // skipping instructions or intrinsics that the inliner could leave behind;
797     // be conservative for now and don't skip over opaque calls, which could
798     // potentially include other ARC calls.
799     auto *CB = dyn_cast<CallBase>(NonARCInst);
800     if (!CB)
801       return true;
802     return CB->getIntrinsicID() != Intrinsic::not_intrinsic;
803   };
804 
805   // Visit all objc_* calls in F.
806   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
807     Instruction *Inst = &*I++;
808 
809     if (auto *CI = dyn_cast<CallInst>(Inst))
810       if (objcarc::hasAttachedCallOpBundle(CI)) {
811         BundledInsts->insertRVCall(&*I, CI);
812         Changed = true;
813       }
814 
815     ARCInstKind Class = GetBasicARCInstKind(Inst);
816 
817     // Skip this loop if this instruction isn't itself an ARC intrinsic.
818     const Value *Arg = nullptr;
819     switch (Class) {
820     default:
821       optimizeDelayedAutoreleaseRV();
822       break;
823     case ARCInstKind::CallOrUser:
824     case ARCInstKind::User:
825     case ARCInstKind::None:
826       // This is a non-ARC instruction.  If we're delaying an AutoreleaseRV,
827       // check if it's safe to skip over it; if not, optimize the AutoreleaseRV
828       // now.
829       if (!shouldDelayAutoreleaseRV(Inst))
830         optimizeDelayedAutoreleaseRV();
831       continue;
832     case ARCInstKind::AutoreleaseRV:
833       optimizeDelayedAutoreleaseRV();
834       setDelayedAutoreleaseRV(Inst);
835       continue;
836     case ARCInstKind::RetainRV:
837     case ARCInstKind::UnsafeClaimRV:
838       if (DelayedAutoreleaseRV) {
839         // We have a potential RV pair.  Check if they cancel out.
840         if (OptimizeInlinedAutoreleaseRVCall(F, Inst, Arg, Class,
841                                              DelayedAutoreleaseRV,
842                                              DelayedAutoreleaseRVArg)) {
843           setDelayedAutoreleaseRV(nullptr);
844           continue;
845         }
846         optimizeDelayedAutoreleaseRV();
847       }
848       break;
849     }
850 
851     OptimizeIndividualCallImpl(F, Inst, Class, Arg);
852   }
853 
854   // Catch the final delayed AutoreleaseRV.
855   optimizeDelayedAutoreleaseRV();
856 }
857 
858 /// This function returns true if the value is inert. An ObjC ARC runtime call
859 /// taking an inert operand can be safely deleted.
isInertARCValue(Value * V,SmallPtrSet<Value *,1> & VisitedPhis)860 static bool isInertARCValue(Value *V, SmallPtrSet<Value *, 1> &VisitedPhis) {
861   V = V->stripPointerCasts();
862 
863   if (IsNullOrUndef(V))
864     return true;
865 
866   // See if this is a global attribute annotated with an 'objc_arc_inert'.
867   if (auto *GV = dyn_cast<GlobalVariable>(V))
868     if (GV->hasAttribute("objc_arc_inert"))
869       return true;
870 
871   if (auto PN = dyn_cast<PHINode>(V)) {
872     // Ignore this phi if it has already been discovered.
873     if (!VisitedPhis.insert(PN).second)
874       return true;
875     // Look through phis's operands.
876     for (Value *Opnd : PN->incoming_values())
877       if (!isInertARCValue(Opnd, VisitedPhis))
878         return false;
879     return true;
880   }
881 
882   return false;
883 }
884 
OptimizeIndividualCallImpl(Function & F,Instruction * Inst,ARCInstKind Class,const Value * Arg)885 void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
886                                             ARCInstKind Class,
887                                             const Value *Arg) {
888   LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
889 
890   // We can delete this call if it takes an inert value.
891   SmallPtrSet<Value *, 1> VisitedPhis;
892 
893   if (BundledInsts->contains(Inst)) {
894     UsedInThisFunction |= 1 << unsigned(Class);
895     return;
896   }
897 
898   if (IsNoopOnGlobal(Class))
899     if (isInertARCValue(Inst->getOperand(0), VisitedPhis)) {
900       if (!Inst->getType()->isVoidTy())
901         Inst->replaceAllUsesWith(Inst->getOperand(0));
902       Inst->eraseFromParent();
903       Changed = true;
904       return;
905     }
906 
907   switch (Class) {
908   default:
909     break;
910 
911   // Delete no-op casts. These function calls have special semantics, but
912   // the semantics are entirely implemented via lowering in the front-end,
913   // so by the time they reach the optimizer, they are just no-op calls
914   // which return their argument.
915   //
916   // There are gray areas here, as the ability to cast reference-counted
917   // pointers to raw void* and back allows code to break ARC assumptions,
918   // however these are currently considered to be unimportant.
919   case ARCInstKind::NoopCast:
920     Changed = true;
921     ++NumNoops;
922     LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
923     EraseInstruction(Inst);
924     return;
925 
926   // If the pointer-to-weak-pointer is null, it's undefined behavior.
927   case ARCInstKind::StoreWeak:
928   case ARCInstKind::LoadWeak:
929   case ARCInstKind::LoadWeakRetained:
930   case ARCInstKind::InitWeak:
931   case ARCInstKind::DestroyWeak: {
932     CallInst *CI = cast<CallInst>(Inst);
933     if (IsNullOrUndef(CI->getArgOperand(0))) {
934       Changed = true;
935       new StoreInst(ConstantInt::getTrue(CI->getContext()),
936                     PoisonValue::get(PointerType::getUnqual(CI->getContext())),
937                     CI);
938       Value *NewValue = PoisonValue::get(CI->getType());
939       LLVM_DEBUG(
940           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
941                     "\nOld = "
942                  << *CI << "\nNew = " << *NewValue << "\n");
943       CI->replaceAllUsesWith(NewValue);
944       CI->eraseFromParent();
945       return;
946     }
947     break;
948   }
949   case ARCInstKind::CopyWeak:
950   case ARCInstKind::MoveWeak: {
951     CallInst *CI = cast<CallInst>(Inst);
952     if (IsNullOrUndef(CI->getArgOperand(0)) ||
953         IsNullOrUndef(CI->getArgOperand(1))) {
954       Changed = true;
955       new StoreInst(ConstantInt::getTrue(CI->getContext()),
956                     PoisonValue::get(PointerType::getUnqual(CI->getContext())),
957                     CI);
958 
959       Value *NewValue = PoisonValue::get(CI->getType());
960       LLVM_DEBUG(
961           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
962                     "\nOld = "
963                  << *CI << "\nNew = " << *NewValue << "\n");
964 
965       CI->replaceAllUsesWith(NewValue);
966       CI->eraseFromParent();
967       return;
968     }
969     break;
970   }
971   case ARCInstKind::RetainRV:
972     if (OptimizeRetainRVCall(F, Inst))
973       return;
974     break;
975   case ARCInstKind::AutoreleaseRV:
976     OptimizeAutoreleaseRVCall(F, Inst, Class);
977     break;
978   }
979 
980   // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
981   if (IsAutorelease(Class) && Inst->use_empty()) {
982     CallInst *Call = cast<CallInst>(Inst);
983     const Value *Arg = Call->getArgOperand(0);
984     Arg = FindSingleUseIdentifiedObject(Arg);
985     if (Arg) {
986       Changed = true;
987       ++NumAutoreleases;
988 
989       // Create the declaration lazily.
990       LLVMContext &C = Inst->getContext();
991 
992       Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
993       CallInst *NewCall =
994           CallInst::Create(Decl, Call->getArgOperand(0), "", Call);
995       NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
996                            MDNode::get(C, std::nullopt));
997 
998       LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
999                            "since x is otherwise unused.\nOld: "
1000                         << *Call << "\nNew: " << *NewCall << "\n");
1001 
1002       EraseInstruction(Call);
1003       Inst = NewCall;
1004       Class = ARCInstKind::Release;
1005     }
1006   }
1007 
1008   // For functions which can never be passed stack arguments, add
1009   // a tail keyword.
1010   if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
1011     Changed = true;
1012     LLVM_DEBUG(
1013         dbgs() << "Adding tail keyword to function since it can never be "
1014                   "passed stack args: "
1015                << *Inst << "\n");
1016     cast<CallInst>(Inst)->setTailCall();
1017   }
1018 
1019   // Ensure that functions that can never have a "tail" keyword due to the
1020   // semantics of ARC truly do not do so.
1021   if (IsNeverTail(Class)) {
1022     Changed = true;
1023     LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst
1024                       << "\n");
1025     cast<CallInst>(Inst)->setTailCall(false);
1026   }
1027 
1028   // Set nounwind as needed.
1029   if (IsNoThrow(Class)) {
1030     Changed = true;
1031     LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
1032                       << "\n");
1033     cast<CallInst>(Inst)->setDoesNotThrow();
1034   }
1035 
1036   // Note: This catches instructions unrelated to ARC.
1037   if (!IsNoopOnNull(Class)) {
1038     UsedInThisFunction |= 1 << unsigned(Class);
1039     return;
1040   }
1041 
1042   // If we haven't already looked up the root, look it up now.
1043   if (!Arg)
1044     Arg = GetArgRCIdentityRoot(Inst);
1045 
1046   // ARC calls with null are no-ops. Delete them.
1047   if (IsNullOrUndef(Arg)) {
1048     Changed = true;
1049     ++NumNoops;
1050     LLVM_DEBUG(dbgs() << "ARC calls with  null are no-ops. Erasing: " << *Inst
1051                       << "\n");
1052     EraseInstruction(Inst);
1053     return;
1054   }
1055 
1056   // Keep track of which of retain, release, autorelease, and retain_block
1057   // are actually present in this function.
1058   UsedInThisFunction |= 1 << unsigned(Class);
1059 
1060   // If Arg is a PHI, and one or more incoming values to the
1061   // PHI are null, and the call is control-equivalent to the PHI, and there
1062   // are no relevant side effects between the PHI and the call, and the call
1063   // is not a release that doesn't have the clang.imprecise_release tag, the
1064   // call could be pushed up to just those paths with non-null incoming
1065   // values. For now, don't bother splitting critical edges for this.
1066   if (Class == ARCInstKind::Release &&
1067       !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease)))
1068     return;
1069 
1070   SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
1071   Worklist.push_back(std::make_pair(Inst, Arg));
1072   do {
1073     std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
1074     Inst = Pair.first;
1075     Arg = Pair.second;
1076 
1077     const PHINode *PN = dyn_cast<PHINode>(Arg);
1078     if (!PN)
1079       continue;
1080 
1081     // Determine if the PHI has any null operands, or any incoming
1082     // critical edges.
1083     bool HasNull = false;
1084     bool HasCriticalEdges = false;
1085     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1086       Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
1087       if (IsNullOrUndef(Incoming))
1088         HasNull = true;
1089       else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
1090                1) {
1091         HasCriticalEdges = true;
1092         break;
1093       }
1094     }
1095     // If we have null operands and no critical edges, optimize.
1096     if (HasCriticalEdges)
1097       continue;
1098     if (!HasNull)
1099       continue;
1100 
1101     Instruction *DepInst = nullptr;
1102 
1103     // Check that there is nothing that cares about the reference
1104     // count between the call and the phi.
1105     switch (Class) {
1106     case ARCInstKind::Retain:
1107     case ARCInstKind::RetainBlock:
1108       // These can always be moved up.
1109       break;
1110     case ARCInstKind::Release:
1111       // These can't be moved across things that care about the retain
1112       // count.
1113       DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg,
1114                                      Inst->getParent(), Inst, PA);
1115       break;
1116     case ARCInstKind::Autorelease:
1117       // These can't be moved across autorelease pool scope boundaries.
1118       DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg,
1119                                      Inst->getParent(), Inst, PA);
1120       break;
1121     case ARCInstKind::UnsafeClaimRV:
1122     case ARCInstKind::RetainRV:
1123     case ARCInstKind::AutoreleaseRV:
1124       // Don't move these; the RV optimization depends on the autoreleaseRV
1125       // being tail called, and the retainRV being immediately after a call
1126       // (which might still happen if we get lucky with codegen layout, but
1127       // it's not worth taking the chance).
1128       continue;
1129     default:
1130       llvm_unreachable("Invalid dependence flavor");
1131     }
1132 
1133     if (DepInst != PN)
1134       continue;
1135 
1136     Changed = true;
1137     ++NumPartialNoops;
1138     // Clone the call into each predecessor that has a non-null value.
1139     CallInst *CInst = cast<CallInst>(Inst);
1140     Type *ParamTy = CInst->getArgOperand(0)->getType();
1141     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1142       Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
1143       if (IsNullOrUndef(Incoming))
1144         continue;
1145       Value *Op = PN->getIncomingValue(i);
1146       Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
1147       SmallVector<OperandBundleDef, 1> OpBundles;
1148       cloneOpBundlesIf(CInst, OpBundles, [](const OperandBundleUse &B) {
1149         return B.getTagID() != LLVMContext::OB_funclet;
1150       });
1151       addOpBundleForFunclet(InsertPos->getParent(), OpBundles);
1152       CallInst *Clone = CallInst::Create(CInst, OpBundles);
1153       if (Op->getType() != ParamTy)
1154         Op = new BitCastInst(Op, ParamTy, "", InsertPos);
1155       Clone->setArgOperand(0, Op);
1156       Clone->insertBefore(InsertPos);
1157 
1158       LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n"
1159                                                    "And inserting clone at "
1160                         << *InsertPos << "\n");
1161       Worklist.push_back(std::make_pair(Clone, Incoming));
1162     }
1163     // Erase the original call.
1164     LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
1165     EraseInstruction(CInst);
1166   } while (!Worklist.empty());
1167 }
1168 
1169 /// If we have a top down pointer in the S_Use state, make sure that there are
1170 /// no CFG hazards by checking the states of various bottom up pointers.
CheckForUseCFGHazard(const Sequence SuccSSeq,const bool SuccSRRIKnownSafe,TopDownPtrState & S,bool & SomeSuccHasSame,bool & AllSuccsHaveSame,bool & NotAllSeqEqualButKnownSafe,bool & ShouldContinue)1171 static void CheckForUseCFGHazard(const Sequence SuccSSeq,
1172                                  const bool SuccSRRIKnownSafe,
1173                                  TopDownPtrState &S,
1174                                  bool &SomeSuccHasSame,
1175                                  bool &AllSuccsHaveSame,
1176                                  bool &NotAllSeqEqualButKnownSafe,
1177                                  bool &ShouldContinue) {
1178   switch (SuccSSeq) {
1179   case S_CanRelease: {
1180     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) {
1181       S.ClearSequenceProgress();
1182       break;
1183     }
1184     S.SetCFGHazardAfflicted(true);
1185     ShouldContinue = true;
1186     break;
1187   }
1188   case S_Use:
1189     SomeSuccHasSame = true;
1190     break;
1191   case S_Stop:
1192   case S_MovableRelease:
1193     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1194       AllSuccsHaveSame = false;
1195     else
1196       NotAllSeqEqualButKnownSafe = true;
1197     break;
1198   case S_Retain:
1199     llvm_unreachable("bottom-up pointer in retain state!");
1200   case S_None:
1201     llvm_unreachable("This should have been handled earlier.");
1202   }
1203 }
1204 
1205 /// If we have a Top Down pointer in the S_CanRelease state, make sure that
1206 /// there are no CFG hazards by checking the states of various bottom up
1207 /// pointers.
CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,const bool SuccSRRIKnownSafe,TopDownPtrState & S,bool & SomeSuccHasSame,bool & AllSuccsHaveSame,bool & NotAllSeqEqualButKnownSafe)1208 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
1209                                         const bool SuccSRRIKnownSafe,
1210                                         TopDownPtrState &S,
1211                                         bool &SomeSuccHasSame,
1212                                         bool &AllSuccsHaveSame,
1213                                         bool &NotAllSeqEqualButKnownSafe) {
1214   switch (SuccSSeq) {
1215   case S_CanRelease:
1216     SomeSuccHasSame = true;
1217     break;
1218   case S_Stop:
1219   case S_MovableRelease:
1220   case S_Use:
1221     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1222       AllSuccsHaveSame = false;
1223     else
1224       NotAllSeqEqualButKnownSafe = true;
1225     break;
1226   case S_Retain:
1227     llvm_unreachable("bottom-up pointer in retain state!");
1228   case S_None:
1229     llvm_unreachable("This should have been handled earlier.");
1230   }
1231 }
1232 
1233 /// Check for critical edges, loop boundaries, irreducible control flow, or
1234 /// other CFG structures where moving code across the edge would result in it
1235 /// being executed more.
1236 void
CheckForCFGHazards(const BasicBlock * BB,DenseMap<const BasicBlock *,BBState> & BBStates,BBState & MyStates) const1237 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
1238                                DenseMap<const BasicBlock *, BBState> &BBStates,
1239                                BBState &MyStates) const {
1240   // If any top-down local-use or possible-dec has a succ which is earlier in
1241   // the sequence, forget it.
1242   for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end();
1243        I != E; ++I) {
1244     TopDownPtrState &S = I->second;
1245     const Sequence Seq = I->second.GetSeq();
1246 
1247     // We only care about S_Retain, S_CanRelease, and S_Use.
1248     if (Seq == S_None)
1249       continue;
1250 
1251     // Make sure that if extra top down states are added in the future that this
1252     // code is updated to handle it.
1253     assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
1254            "Unknown top down sequence state.");
1255 
1256     const Value *Arg = I->first;
1257     bool SomeSuccHasSame = false;
1258     bool AllSuccsHaveSame = true;
1259     bool NotAllSeqEqualButKnownSafe = false;
1260 
1261     for (const BasicBlock *Succ : successors(BB)) {
1262       // If VisitBottomUp has pointer information for this successor, take
1263       // what we know about it.
1264       const DenseMap<const BasicBlock *, BBState>::iterator BBI =
1265           BBStates.find(Succ);
1266       assert(BBI != BBStates.end());
1267       const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
1268       const Sequence SuccSSeq = SuccS.GetSeq();
1269 
1270       // If bottom up, the pointer is in an S_None state, clear the sequence
1271       // progress since the sequence in the bottom up state finished
1272       // suggesting a mismatch in between retains/releases. This is true for
1273       // all three cases that we are handling here: S_Retain, S_Use, and
1274       // S_CanRelease.
1275       if (SuccSSeq == S_None) {
1276         S.ClearSequenceProgress();
1277         continue;
1278       }
1279 
1280       // If we have S_Use or S_CanRelease, perform our check for cfg hazard
1281       // checks.
1282       const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe();
1283 
1284       // *NOTE* We do not use Seq from above here since we are allowing for
1285       // S.GetSeq() to change while we are visiting basic blocks.
1286       switch(S.GetSeq()) {
1287       case S_Use: {
1288         bool ShouldContinue = false;
1289         CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame,
1290                              AllSuccsHaveSame, NotAllSeqEqualButKnownSafe,
1291                              ShouldContinue);
1292         if (ShouldContinue)
1293           continue;
1294         break;
1295       }
1296       case S_CanRelease:
1297         CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
1298                                     SomeSuccHasSame, AllSuccsHaveSame,
1299                                     NotAllSeqEqualButKnownSafe);
1300         break;
1301       case S_Retain:
1302       case S_None:
1303       case S_Stop:
1304       case S_MovableRelease:
1305         break;
1306       }
1307     }
1308 
1309     // If the state at the other end of any of the successor edges
1310     // matches the current state, require all edges to match. This
1311     // guards against loops in the middle of a sequence.
1312     if (SomeSuccHasSame && !AllSuccsHaveSame) {
1313       S.ClearSequenceProgress();
1314     } else if (NotAllSeqEqualButKnownSafe) {
1315       // If we would have cleared the state foregoing the fact that we are known
1316       // safe, stop code motion. This is because whether or not it is safe to
1317       // remove RR pairs via KnownSafe is an orthogonal concept to whether we
1318       // are allowed to perform code motion.
1319       S.SetCFGHazardAfflicted(true);
1320     }
1321   }
1322 }
1323 
VisitInstructionBottomUp(Instruction * Inst,BasicBlock * BB,BlotMapVector<Value *,RRInfo> & Retains,BBState & MyStates)1324 bool ObjCARCOpt::VisitInstructionBottomUp(
1325     Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains,
1326     BBState &MyStates) {
1327   bool NestingDetected = false;
1328   ARCInstKind Class = GetARCInstKind(Inst);
1329   const Value *Arg = nullptr;
1330 
1331   LLVM_DEBUG(dbgs() << "        Class: " << Class << "\n");
1332 
1333   switch (Class) {
1334   case ARCInstKind::Release: {
1335     Arg = GetArgRCIdentityRoot(Inst);
1336 
1337     BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1338     NestingDetected |= S.InitBottomUp(MDKindCache, Inst);
1339     break;
1340   }
1341   case ARCInstKind::RetainBlock:
1342     // In OptimizeIndividualCalls, we have strength reduced all optimizable
1343     // objc_retainBlocks to objc_retains. Thus at this point any
1344     // objc_retainBlocks that we see are not optimizable.
1345     break;
1346   case ARCInstKind::Retain:
1347   case ARCInstKind::RetainRV: {
1348     Arg = GetArgRCIdentityRoot(Inst);
1349     BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1350     if (S.MatchWithRetain()) {
1351       // Don't do retain+release tracking for ARCInstKind::RetainRV, because
1352       // it's better to let it remain as the first instruction after a call.
1353       if (Class != ARCInstKind::RetainRV) {
1354         LLVM_DEBUG(dbgs() << "        Matching with: " << *Inst << "\n");
1355         Retains[Inst] = S.GetRRInfo();
1356       }
1357       S.ClearSequenceProgress();
1358     }
1359     // A retain moving bottom up can be a use.
1360     break;
1361   }
1362   case ARCInstKind::AutoreleasepoolPop:
1363     // Conservatively, clear MyStates for all known pointers.
1364     MyStates.clearBottomUpPointers();
1365     return NestingDetected;
1366   case ARCInstKind::AutoreleasepoolPush:
1367   case ARCInstKind::None:
1368     // These are irrelevant.
1369     return NestingDetected;
1370   default:
1371     break;
1372   }
1373 
1374   // Consider any other possible effects of this instruction on each
1375   // pointer being tracked.
1376   for (auto MI = MyStates.bottom_up_ptr_begin(),
1377             ME = MyStates.bottom_up_ptr_end();
1378        MI != ME; ++MI) {
1379     const Value *Ptr = MI->first;
1380     if (Ptr == Arg)
1381       continue; // Handled above.
1382     BottomUpPtrState &S = MI->second;
1383 
1384     if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
1385       continue;
1386 
1387     S.HandlePotentialUse(BB, Inst, Ptr, PA, Class);
1388   }
1389 
1390   return NestingDetected;
1391 }
1392 
VisitBottomUp(BasicBlock * BB,DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains)1393 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
1394                                DenseMap<const BasicBlock *, BBState> &BBStates,
1395                                BlotMapVector<Value *, RRInfo> &Retains) {
1396   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
1397 
1398   bool NestingDetected = false;
1399   BBState &MyStates = BBStates[BB];
1400 
1401   // Merge the states from each successor to compute the initial state
1402   // for the current block.
1403   BBState::edge_iterator SI(MyStates.succ_begin()),
1404                          SE(MyStates.succ_end());
1405   if (SI != SE) {
1406     const BasicBlock *Succ = *SI;
1407     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
1408     assert(I != BBStates.end());
1409     MyStates.InitFromSucc(I->second);
1410     ++SI;
1411     for (; SI != SE; ++SI) {
1412       Succ = *SI;
1413       I = BBStates.find(Succ);
1414       assert(I != BBStates.end());
1415       MyStates.MergeSucc(I->second);
1416     }
1417   }
1418 
1419   LLVM_DEBUG(dbgs() << "Before:\n"
1420                     << BBStates[BB] << "\n"
1421                     << "Performing Dataflow:\n");
1422 
1423   // Visit all the instructions, bottom-up.
1424   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
1425     Instruction *Inst = &*std::prev(I);
1426 
1427     // Invoke instructions are visited as part of their successors (below).
1428     if (isa<InvokeInst>(Inst))
1429       continue;
1430 
1431     LLVM_DEBUG(dbgs() << "    Visiting " << *Inst << "\n");
1432 
1433     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
1434 
1435     // Bail out if the number of pointers being tracked becomes too large so
1436     // that this pass can complete in a reasonable amount of time.
1437     if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) {
1438       DisableRetainReleasePairing = true;
1439       return false;
1440     }
1441   }
1442 
1443   // If there's a predecessor with an invoke, visit the invoke as if it were
1444   // part of this block, since we can't insert code after an invoke in its own
1445   // block, and we don't want to split critical edges.
1446   for (BBState::edge_iterator PI(MyStates.pred_begin()),
1447        PE(MyStates.pred_end()); PI != PE; ++PI) {
1448     BasicBlock *Pred = *PI;
1449     if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
1450       NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
1451   }
1452 
1453   LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n");
1454 
1455   return NestingDetected;
1456 }
1457 
1458 // Fill ReleaseInsertPtToRCIdentityRoots, which is a map from insertion points
1459 // to the set of RC identity roots that would be released by the release calls
1460 // moved to the insertion points.
collectReleaseInsertPts(const BlotMapVector<Value *,RRInfo> & Retains,DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1461 static void collectReleaseInsertPts(
1462     const BlotMapVector<Value *, RRInfo> &Retains,
1463     DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1464         &ReleaseInsertPtToRCIdentityRoots) {
1465   for (const auto &P : Retains) {
1466     // Retains is a map from an objc_retain call to a RRInfo of the RC identity
1467     // root of the call. Get the RC identity root of the objc_retain call.
1468     Instruction *Retain = cast<Instruction>(P.first);
1469     Value *Root = GetRCIdentityRoot(Retain->getOperand(0));
1470     // Collect all the insertion points of the objc_release calls that release
1471     // the RC identity root of the objc_retain call.
1472     for (const Instruction *InsertPt : P.second.ReverseInsertPts)
1473       ReleaseInsertPtToRCIdentityRoots[InsertPt].insert(Root);
1474   }
1475 }
1476 
1477 // Get the RC identity roots from an insertion point of an objc_release call.
1478 // Return nullptr if the passed instruction isn't an insertion point.
1479 static const SmallPtrSet<const Value *, 2> *
getRCIdentityRootsFromReleaseInsertPt(const Instruction * InsertPt,const DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1480 getRCIdentityRootsFromReleaseInsertPt(
1481     const Instruction *InsertPt,
1482     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1483         &ReleaseInsertPtToRCIdentityRoots) {
1484   auto I = ReleaseInsertPtToRCIdentityRoots.find(InsertPt);
1485   if (I == ReleaseInsertPtToRCIdentityRoots.end())
1486     return nullptr;
1487   return &I->second;
1488 }
1489 
VisitInstructionTopDown(Instruction * Inst,DenseMap<Value *,RRInfo> & Releases,BBState & MyStates,const DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1490 bool ObjCARCOpt::VisitInstructionTopDown(
1491     Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates,
1492     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1493         &ReleaseInsertPtToRCIdentityRoots) {
1494   bool NestingDetected = false;
1495   ARCInstKind Class = GetARCInstKind(Inst);
1496   const Value *Arg = nullptr;
1497 
1498   // Make sure a call to objc_retain isn't moved past insertion points of calls
1499   // to objc_release.
1500   if (const SmallPtrSet<const Value *, 2> *Roots =
1501           getRCIdentityRootsFromReleaseInsertPt(
1502               Inst, ReleaseInsertPtToRCIdentityRoots))
1503     for (const auto *Root : *Roots) {
1504       TopDownPtrState &S = MyStates.getPtrTopDownState(Root);
1505       // Disable code motion if the current position is S_Retain to prevent
1506       // moving the objc_retain call past objc_release calls. If it's
1507       // S_CanRelease or larger, it's not necessary to disable code motion as
1508       // the insertion points that prevent the objc_retain call from moving down
1509       // should have been set already.
1510       if (S.GetSeq() == S_Retain)
1511         S.SetCFGHazardAfflicted(true);
1512     }
1513 
1514   LLVM_DEBUG(dbgs() << "        Class: " << Class << "\n");
1515 
1516   switch (Class) {
1517   case ARCInstKind::RetainBlock:
1518     // In OptimizeIndividualCalls, we have strength reduced all optimizable
1519     // objc_retainBlocks to objc_retains. Thus at this point any
1520     // objc_retainBlocks that we see are not optimizable. We need to break since
1521     // a retain can be a potential use.
1522     break;
1523   case ARCInstKind::Retain:
1524   case ARCInstKind::RetainRV: {
1525     Arg = GetArgRCIdentityRoot(Inst);
1526     TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1527     NestingDetected |= S.InitTopDown(Class, Inst);
1528     // A retain can be a potential use; proceed to the generic checking
1529     // code below.
1530     break;
1531   }
1532   case ARCInstKind::Release: {
1533     Arg = GetArgRCIdentityRoot(Inst);
1534     TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1535     // Try to form a tentative pair in between this release instruction and the
1536     // top down pointers that we are tracking.
1537     if (S.MatchWithRelease(MDKindCache, Inst)) {
1538       // If we succeed, copy S's RRInfo into the Release -> {Retain Set
1539       // Map}. Then we clear S.
1540       LLVM_DEBUG(dbgs() << "        Matching with: " << *Inst << "\n");
1541       Releases[Inst] = S.GetRRInfo();
1542       S.ClearSequenceProgress();
1543     }
1544     break;
1545   }
1546   case ARCInstKind::AutoreleasepoolPop:
1547     // Conservatively, clear MyStates for all known pointers.
1548     MyStates.clearTopDownPointers();
1549     return false;
1550   case ARCInstKind::AutoreleasepoolPush:
1551   case ARCInstKind::None:
1552     // These can not be uses of
1553     return false;
1554   default:
1555     break;
1556   }
1557 
1558   // Consider any other possible effects of this instruction on each
1559   // pointer being tracked.
1560   for (auto MI = MyStates.top_down_ptr_begin(),
1561             ME = MyStates.top_down_ptr_end();
1562        MI != ME; ++MI) {
1563     const Value *Ptr = MI->first;
1564     if (Ptr == Arg)
1565       continue; // Handled above.
1566     TopDownPtrState &S = MI->second;
1567     if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class, *BundledInsts))
1568       continue;
1569 
1570     S.HandlePotentialUse(Inst, Ptr, PA, Class);
1571   }
1572 
1573   return NestingDetected;
1574 }
1575 
VisitTopDown(BasicBlock * BB,DenseMap<const BasicBlock *,BBState> & BBStates,DenseMap<Value *,RRInfo> & Releases,const DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1576 bool ObjCARCOpt::VisitTopDown(
1577     BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates,
1578     DenseMap<Value *, RRInfo> &Releases,
1579     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1580         &ReleaseInsertPtToRCIdentityRoots) {
1581   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
1582   bool NestingDetected = false;
1583   BBState &MyStates = BBStates[BB];
1584 
1585   // Merge the states from each predecessor to compute the initial state
1586   // for the current block.
1587   BBState::edge_iterator PI(MyStates.pred_begin()),
1588                          PE(MyStates.pred_end());
1589   if (PI != PE) {
1590     const BasicBlock *Pred = *PI;
1591     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
1592     assert(I != BBStates.end());
1593     MyStates.InitFromPred(I->second);
1594     ++PI;
1595     for (; PI != PE; ++PI) {
1596       Pred = *PI;
1597       I = BBStates.find(Pred);
1598       assert(I != BBStates.end());
1599       MyStates.MergePred(I->second);
1600     }
1601   }
1602 
1603   // Check that BB and MyStates have the same number of predecessors. This
1604   // prevents retain calls that live outside a loop from being moved into the
1605   // loop.
1606   if (!BB->hasNPredecessors(MyStates.pred_end() - MyStates.pred_begin()))
1607     for (auto I = MyStates.top_down_ptr_begin(),
1608               E = MyStates.top_down_ptr_end();
1609          I != E; ++I)
1610       I->second.SetCFGHazardAfflicted(true);
1611 
1612   LLVM_DEBUG(dbgs() << "Before:\n"
1613                     << BBStates[BB] << "\n"
1614                     << "Performing Dataflow:\n");
1615 
1616   // Visit all the instructions, top-down.
1617   for (Instruction &Inst : *BB) {
1618     LLVM_DEBUG(dbgs() << "    Visiting " << Inst << "\n");
1619 
1620     NestingDetected |= VisitInstructionTopDown(
1621         &Inst, Releases, MyStates, ReleaseInsertPtToRCIdentityRoots);
1622 
1623     // Bail out if the number of pointers being tracked becomes too large so
1624     // that this pass can complete in a reasonable amount of time.
1625     if (MyStates.top_down_ptr_list_size() > MaxPtrStates) {
1626       DisableRetainReleasePairing = true;
1627       return false;
1628     }
1629   }
1630 
1631   LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n"
1632                     << BBStates[BB] << "\n\n");
1633   CheckForCFGHazards(BB, BBStates, MyStates);
1634   LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n");
1635   return NestingDetected;
1636 }
1637 
1638 static void
ComputePostOrders(Function & F,SmallVectorImpl<BasicBlock * > & PostOrder,SmallVectorImpl<BasicBlock * > & ReverseCFGPostOrder,unsigned NoObjCARCExceptionsMDKind,DenseMap<const BasicBlock *,BBState> & BBStates)1639 ComputePostOrders(Function &F,
1640                   SmallVectorImpl<BasicBlock *> &PostOrder,
1641                   SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
1642                   unsigned NoObjCARCExceptionsMDKind,
1643                   DenseMap<const BasicBlock *, BBState> &BBStates) {
1644   /// The visited set, for doing DFS walks.
1645   SmallPtrSet<BasicBlock *, 16> Visited;
1646 
1647   // Do DFS, computing the PostOrder.
1648   SmallPtrSet<BasicBlock *, 16> OnStack;
1649   SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
1650 
1651   // Functions always have exactly one entry block, and we don't have
1652   // any other block that we treat like an entry block.
1653   BasicBlock *EntryBB = &F.getEntryBlock();
1654   BBState &MyStates = BBStates[EntryBB];
1655   MyStates.SetAsEntry();
1656   Instruction *EntryTI = EntryBB->getTerminator();
1657   SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
1658   Visited.insert(EntryBB);
1659   OnStack.insert(EntryBB);
1660   do {
1661   dfs_next_succ:
1662     BasicBlock *CurrBB = SuccStack.back().first;
1663     succ_iterator SE(CurrBB->getTerminator(), false);
1664 
1665     while (SuccStack.back().second != SE) {
1666       BasicBlock *SuccBB = *SuccStack.back().second++;
1667       if (Visited.insert(SuccBB).second) {
1668         SuccStack.push_back(
1669             std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator())));
1670         BBStates[CurrBB].addSucc(SuccBB);
1671         BBState &SuccStates = BBStates[SuccBB];
1672         SuccStates.addPred(CurrBB);
1673         OnStack.insert(SuccBB);
1674         goto dfs_next_succ;
1675       }
1676 
1677       if (!OnStack.count(SuccBB)) {
1678         BBStates[CurrBB].addSucc(SuccBB);
1679         BBStates[SuccBB].addPred(CurrBB);
1680       }
1681     }
1682     OnStack.erase(CurrBB);
1683     PostOrder.push_back(CurrBB);
1684     SuccStack.pop_back();
1685   } while (!SuccStack.empty());
1686 
1687   Visited.clear();
1688 
1689   // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
1690   // Functions may have many exits, and there also blocks which we treat
1691   // as exits due to ignored edges.
1692   SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
1693   for (BasicBlock &ExitBB : F) {
1694     BBState &MyStates = BBStates[&ExitBB];
1695     if (!MyStates.isExit())
1696       continue;
1697 
1698     MyStates.SetAsExit();
1699 
1700     PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin()));
1701     Visited.insert(&ExitBB);
1702     while (!PredStack.empty()) {
1703     reverse_dfs_next_succ:
1704       BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
1705       while (PredStack.back().second != PE) {
1706         BasicBlock *BB = *PredStack.back().second++;
1707         if (Visited.insert(BB).second) {
1708           PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
1709           goto reverse_dfs_next_succ;
1710         }
1711       }
1712       ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
1713     }
1714   }
1715 }
1716 
1717 // Visit the function both top-down and bottom-up.
Visit(Function & F,DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases)1718 bool ObjCARCOpt::Visit(Function &F,
1719                        DenseMap<const BasicBlock *, BBState> &BBStates,
1720                        BlotMapVector<Value *, RRInfo> &Retains,
1721                        DenseMap<Value *, RRInfo> &Releases) {
1722   // Use reverse-postorder traversals, because we magically know that loops
1723   // will be well behaved, i.e. they won't repeatedly call retain on a single
1724   // pointer without doing a release. We can't use the ReversePostOrderTraversal
1725   // class here because we want the reverse-CFG postorder to consider each
1726   // function exit point, and we want to ignore selected cycle edges.
1727   SmallVector<BasicBlock *, 16> PostOrder;
1728   SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
1729   ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
1730                     MDKindCache.get(ARCMDKindID::NoObjCARCExceptions),
1731                     BBStates);
1732 
1733   // Use reverse-postorder on the reverse CFG for bottom-up.
1734   bool BottomUpNestingDetected = false;
1735   for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) {
1736     BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
1737     if (DisableRetainReleasePairing)
1738       return false;
1739   }
1740 
1741   DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1742       ReleaseInsertPtToRCIdentityRoots;
1743   collectReleaseInsertPts(Retains, ReleaseInsertPtToRCIdentityRoots);
1744 
1745   // Use reverse-postorder for top-down.
1746   bool TopDownNestingDetected = false;
1747   for (BasicBlock *BB : llvm::reverse(PostOrder)) {
1748     TopDownNestingDetected |=
1749         VisitTopDown(BB, BBStates, Releases, ReleaseInsertPtToRCIdentityRoots);
1750     if (DisableRetainReleasePairing)
1751       return false;
1752   }
1753 
1754   return TopDownNestingDetected && BottomUpNestingDetected;
1755 }
1756 
1757 /// Move the calls in RetainsToMove and ReleasesToMove.
MoveCalls(Value * Arg,RRInfo & RetainsToMove,RRInfo & ReleasesToMove,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases,SmallVectorImpl<Instruction * > & DeadInsts,Module * M)1758 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
1759                            RRInfo &ReleasesToMove,
1760                            BlotMapVector<Value *, RRInfo> &Retains,
1761                            DenseMap<Value *, RRInfo> &Releases,
1762                            SmallVectorImpl<Instruction *> &DeadInsts,
1763                            Module *M) {
1764   Type *ArgTy = Arg->getType();
1765   Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
1766 
1767   LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
1768 
1769   // Insert the new retain and release calls.
1770   for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
1771     Value *MyArg = ArgTy == ParamTy ? Arg :
1772                    new BitCastInst(Arg, ParamTy, "", InsertPt);
1773     Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
1774     SmallVector<OperandBundleDef, 1> BundleList;
1775     addOpBundleForFunclet(InsertPt->getParent(), BundleList);
1776     CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt);
1777     Call->setDoesNotThrow();
1778     Call->setTailCall();
1779 
1780     LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call
1781                       << "\n"
1782                          "At insertion point: "
1783                       << *InsertPt << "\n");
1784   }
1785   for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
1786     Value *MyArg = ArgTy == ParamTy ? Arg :
1787                    new BitCastInst(Arg, ParamTy, "", InsertPt);
1788     Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
1789     SmallVector<OperandBundleDef, 1> BundleList;
1790     addOpBundleForFunclet(InsertPt->getParent(), BundleList);
1791     CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt);
1792     // Attach a clang.imprecise_release metadata tag, if appropriate.
1793     if (MDNode *M = ReleasesToMove.ReleaseMetadata)
1794       Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M);
1795     Call->setDoesNotThrow();
1796     if (ReleasesToMove.IsTailCallRelease)
1797       Call->setTailCall();
1798 
1799     LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call
1800                       << "\n"
1801                          "At insertion point: "
1802                       << *InsertPt << "\n");
1803   }
1804 
1805   // Delete the original retain and release calls.
1806   for (Instruction *OrigRetain : RetainsToMove.Calls) {
1807     Retains.blot(OrigRetain);
1808     DeadInsts.push_back(OrigRetain);
1809     LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
1810   }
1811   for (Instruction *OrigRelease : ReleasesToMove.Calls) {
1812     Releases.erase(OrigRelease);
1813     DeadInsts.push_back(OrigRelease);
1814     LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
1815   }
1816 }
1817 
PairUpRetainsAndReleases(DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases,Module * M,Instruction * Retain,SmallVectorImpl<Instruction * > & DeadInsts,RRInfo & RetainsToMove,RRInfo & ReleasesToMove,Value * Arg,bool KnownSafe,bool & AnyPairsCompletelyEliminated)1818 bool ObjCARCOpt::PairUpRetainsAndReleases(
1819     DenseMap<const BasicBlock *, BBState> &BBStates,
1820     BlotMapVector<Value *, RRInfo> &Retains,
1821     DenseMap<Value *, RRInfo> &Releases, Module *M,
1822     Instruction *Retain,
1823     SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
1824     RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
1825     bool &AnyPairsCompletelyEliminated) {
1826   // If a pair happens in a region where it is known that the reference count
1827   // is already incremented, we can similarly ignore possible decrements unless
1828   // we are dealing with a retainable object with multiple provenance sources.
1829   bool KnownSafeTD = true, KnownSafeBU = true;
1830   bool CFGHazardAfflicted = false;
1831 
1832   // Connect the dots between the top-down-collected RetainsToMove and
1833   // bottom-up-collected ReleasesToMove to form sets of related calls.
1834   // This is an iterative process so that we connect multiple releases
1835   // to multiple retains if needed.
1836   unsigned OldDelta = 0;
1837   unsigned NewDelta = 0;
1838   unsigned OldCount = 0;
1839   unsigned NewCount = 0;
1840   bool FirstRelease = true;
1841   for (SmallVector<Instruction *, 4> NewRetains{Retain};;) {
1842     SmallVector<Instruction *, 4> NewReleases;
1843     for (Instruction *NewRetain : NewRetains) {
1844       auto It = Retains.find(NewRetain);
1845       assert(It != Retains.end());
1846       const RRInfo &NewRetainRRI = It->second;
1847       KnownSafeTD &= NewRetainRRI.KnownSafe;
1848       CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted;
1849       for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
1850         auto Jt = Releases.find(NewRetainRelease);
1851         if (Jt == Releases.end())
1852           return false;
1853         const RRInfo &NewRetainReleaseRRI = Jt->second;
1854 
1855         // If the release does not have a reference to the retain as well,
1856         // something happened which is unaccounted for. Do not do anything.
1857         //
1858         // This can happen if we catch an additive overflow during path count
1859         // merging.
1860         if (!NewRetainReleaseRRI.Calls.count(NewRetain))
1861           return false;
1862 
1863         if (ReleasesToMove.Calls.insert(NewRetainRelease).second) {
1864           // If we overflow when we compute the path count, don't remove/move
1865           // anything.
1866           const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
1867           unsigned PathCount = BBState::OverflowOccurredValue;
1868           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1869             return false;
1870           assert(PathCount != BBState::OverflowOccurredValue &&
1871                  "PathCount at this point can not be "
1872                  "OverflowOccurredValue.");
1873           OldDelta -= PathCount;
1874 
1875           // Merge the ReleaseMetadata and IsTailCallRelease values.
1876           if (FirstRelease) {
1877             ReleasesToMove.ReleaseMetadata =
1878               NewRetainReleaseRRI.ReleaseMetadata;
1879             ReleasesToMove.IsTailCallRelease =
1880               NewRetainReleaseRRI.IsTailCallRelease;
1881             FirstRelease = false;
1882           } else {
1883             if (ReleasesToMove.ReleaseMetadata !=
1884                 NewRetainReleaseRRI.ReleaseMetadata)
1885               ReleasesToMove.ReleaseMetadata = nullptr;
1886             if (ReleasesToMove.IsTailCallRelease !=
1887                 NewRetainReleaseRRI.IsTailCallRelease)
1888               ReleasesToMove.IsTailCallRelease = false;
1889           }
1890 
1891           // Collect the optimal insertion points.
1892           if (!KnownSafe)
1893             for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) {
1894               if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) {
1895                 // If we overflow when we compute the path count, don't
1896                 // remove/move anything.
1897                 const BBState &RIPBBState = BBStates[RIP->getParent()];
1898                 PathCount = BBState::OverflowOccurredValue;
1899                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1900                   return false;
1901                 assert(PathCount != BBState::OverflowOccurredValue &&
1902                        "PathCount at this point can not be "
1903                        "OverflowOccurredValue.");
1904                 NewDelta -= PathCount;
1905               }
1906             }
1907           NewReleases.push_back(NewRetainRelease);
1908         }
1909       }
1910     }
1911     NewRetains.clear();
1912     if (NewReleases.empty()) break;
1913 
1914     // Back the other way.
1915     for (Instruction *NewRelease : NewReleases) {
1916       auto It = Releases.find(NewRelease);
1917       assert(It != Releases.end());
1918       const RRInfo &NewReleaseRRI = It->second;
1919       KnownSafeBU &= NewReleaseRRI.KnownSafe;
1920       CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
1921       for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
1922         auto Jt = Retains.find(NewReleaseRetain);
1923         if (Jt == Retains.end())
1924           return false;
1925         const RRInfo &NewReleaseRetainRRI = Jt->second;
1926 
1927         // If the retain does not have a reference to the release as well,
1928         // something happened which is unaccounted for. Do not do anything.
1929         //
1930         // This can happen if we catch an additive overflow during path count
1931         // merging.
1932         if (!NewReleaseRetainRRI.Calls.count(NewRelease))
1933           return false;
1934 
1935         if (RetainsToMove.Calls.insert(NewReleaseRetain).second) {
1936           // If we overflow when we compute the path count, don't remove/move
1937           // anything.
1938           const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
1939           unsigned PathCount = BBState::OverflowOccurredValue;
1940           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1941             return false;
1942           assert(PathCount != BBState::OverflowOccurredValue &&
1943                  "PathCount at this point can not be "
1944                  "OverflowOccurredValue.");
1945           OldDelta += PathCount;
1946           OldCount += PathCount;
1947 
1948           // Collect the optimal insertion points.
1949           if (!KnownSafe)
1950             for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) {
1951               if (RetainsToMove.ReverseInsertPts.insert(RIP).second) {
1952                 // If we overflow when we compute the path count, don't
1953                 // remove/move anything.
1954                 const BBState &RIPBBState = BBStates[RIP->getParent()];
1955 
1956                 PathCount = BBState::OverflowOccurredValue;
1957                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1958                   return false;
1959                 assert(PathCount != BBState::OverflowOccurredValue &&
1960                        "PathCount at this point can not be "
1961                        "OverflowOccurredValue.");
1962                 NewDelta += PathCount;
1963                 NewCount += PathCount;
1964               }
1965             }
1966           NewRetains.push_back(NewReleaseRetain);
1967         }
1968       }
1969     }
1970     if (NewRetains.empty()) break;
1971   }
1972 
1973   // We can only remove pointers if we are known safe in both directions.
1974   bool UnconditionallySafe = KnownSafeTD && KnownSafeBU;
1975   if (UnconditionallySafe) {
1976     RetainsToMove.ReverseInsertPts.clear();
1977     ReleasesToMove.ReverseInsertPts.clear();
1978     NewCount = 0;
1979   } else {
1980     // Determine whether the new insertion points we computed preserve the
1981     // balance of retain and release calls through the program.
1982     // TODO: If the fully aggressive solution isn't valid, try to find a
1983     // less aggressive solution which is.
1984     if (NewDelta != 0)
1985       return false;
1986 
1987     // At this point, we are not going to remove any RR pairs, but we still are
1988     // able to move RR pairs. If one of our pointers is afflicted with
1989     // CFGHazards, we cannot perform such code motion so exit early.
1990     const bool WillPerformCodeMotion =
1991         !RetainsToMove.ReverseInsertPts.empty() ||
1992         !ReleasesToMove.ReverseInsertPts.empty();
1993     if (CFGHazardAfflicted && WillPerformCodeMotion)
1994       return false;
1995   }
1996 
1997   // Determine whether the original call points are balanced in the retain and
1998   // release calls through the program. If not, conservatively don't touch
1999   // them.
2000   // TODO: It's theoretically possible to do code motion in this case, as
2001   // long as the existing imbalances are maintained.
2002   if (OldDelta != 0)
2003     return false;
2004 
2005   Changed = true;
2006   assert(OldCount != 0 && "Unreachable code?");
2007   NumRRs += OldCount - NewCount;
2008   // Set to true if we completely removed any RR pairs.
2009   AnyPairsCompletelyEliminated = NewCount == 0;
2010 
2011   // We can move calls!
2012   return true;
2013 }
2014 
2015 /// Identify pairings between the retains and releases, and delete and/or move
2016 /// them.
PerformCodePlacement(DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases,Module * M)2017 bool ObjCARCOpt::PerformCodePlacement(
2018     DenseMap<const BasicBlock *, BBState> &BBStates,
2019     BlotMapVector<Value *, RRInfo> &Retains,
2020     DenseMap<Value *, RRInfo> &Releases, Module *M) {
2021   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
2022 
2023   bool AnyPairsCompletelyEliminated = false;
2024   SmallVector<Instruction *, 8> DeadInsts;
2025 
2026   // Visit each retain.
2027   for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
2028                                                       E = Retains.end();
2029        I != E; ++I) {
2030     Value *V = I->first;
2031     if (!V) continue; // blotted
2032 
2033     Instruction *Retain = cast<Instruction>(V);
2034 
2035     LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
2036 
2037     Value *Arg = GetArgRCIdentityRoot(Retain);
2038 
2039     // If the object being released is in static or stack storage, we know it's
2040     // not being managed by ObjC reference counting, so we can delete pairs
2041     // regardless of what possible decrements or uses lie between them.
2042     bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
2043 
2044     // A constant pointer can't be pointing to an object on the heap. It may
2045     // be reference-counted, but it won't be deleted.
2046     if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
2047       if (const GlobalVariable *GV =
2048             dyn_cast<GlobalVariable>(
2049               GetRCIdentityRoot(LI->getPointerOperand())))
2050         if (GV->isConstant())
2051           KnownSafe = true;
2052 
2053     // Connect the dots between the top-down-collected RetainsToMove and
2054     // bottom-up-collected ReleasesToMove to form sets of related calls.
2055     RRInfo RetainsToMove, ReleasesToMove;
2056 
2057     bool PerformMoveCalls = PairUpRetainsAndReleases(
2058         BBStates, Retains, Releases, M, Retain, DeadInsts,
2059         RetainsToMove, ReleasesToMove, Arg, KnownSafe,
2060         AnyPairsCompletelyEliminated);
2061 
2062     if (PerformMoveCalls) {
2063       // Ok, everything checks out and we're all set. Let's move/delete some
2064       // code!
2065       MoveCalls(Arg, RetainsToMove, ReleasesToMove,
2066                 Retains, Releases, DeadInsts, M);
2067     }
2068   }
2069 
2070   // Now that we're done moving everything, we can delete the newly dead
2071   // instructions, as we no longer need them as insert points.
2072   while (!DeadInsts.empty())
2073     EraseInstruction(DeadInsts.pop_back_val());
2074 
2075   return AnyPairsCompletelyEliminated;
2076 }
2077 
2078 /// Weak pointer optimizations.
OptimizeWeakCalls(Function & F)2079 void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
2080   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
2081 
2082   // First, do memdep-style RLE and S2L optimizations. We can't use memdep
2083   // itself because it uses AliasAnalysis and we need to do provenance
2084   // queries instead.
2085   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
2086     Instruction *Inst = &*I++;
2087 
2088     LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
2089 
2090     ARCInstKind Class = GetBasicARCInstKind(Inst);
2091     if (Class != ARCInstKind::LoadWeak &&
2092         Class != ARCInstKind::LoadWeakRetained)
2093       continue;
2094 
2095     // Delete objc_loadWeak calls with no users.
2096     if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) {
2097       Inst->eraseFromParent();
2098       Changed = true;
2099       continue;
2100     }
2101 
2102     // TODO: For now, just look for an earlier available version of this value
2103     // within the same block. Theoretically, we could do memdep-style non-local
2104     // analysis too, but that would want caching. A better approach would be to
2105     // use the technique that EarlyCSE uses.
2106     inst_iterator Current = std::prev(I);
2107     BasicBlock *CurrentBB = &*Current.getBasicBlockIterator();
2108     for (BasicBlock::iterator B = CurrentBB->begin(),
2109                               J = Current.getInstructionIterator();
2110          J != B; --J) {
2111       Instruction *EarlierInst = &*std::prev(J);
2112       ARCInstKind EarlierClass = GetARCInstKind(EarlierInst);
2113       switch (EarlierClass) {
2114       case ARCInstKind::LoadWeak:
2115       case ARCInstKind::LoadWeakRetained: {
2116         // If this is loading from the same pointer, replace this load's value
2117         // with that one.
2118         CallInst *Call = cast<CallInst>(Inst);
2119         CallInst *EarlierCall = cast<CallInst>(EarlierInst);
2120         Value *Arg = Call->getArgOperand(0);
2121         Value *EarlierArg = EarlierCall->getArgOperand(0);
2122         switch (PA.getAA()->alias(Arg, EarlierArg)) {
2123         case AliasResult::MustAlias:
2124           Changed = true;
2125           // If the load has a builtin retain, insert a plain retain for it.
2126           if (Class == ARCInstKind::LoadWeakRetained) {
2127             Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
2128             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
2129             CI->setTailCall();
2130           }
2131           // Zap the fully redundant load.
2132           Call->replaceAllUsesWith(EarlierCall);
2133           Call->eraseFromParent();
2134           goto clobbered;
2135         case AliasResult::MayAlias:
2136         case AliasResult::PartialAlias:
2137           goto clobbered;
2138         case AliasResult::NoAlias:
2139           break;
2140         }
2141         break;
2142       }
2143       case ARCInstKind::StoreWeak:
2144       case ARCInstKind::InitWeak: {
2145         // If this is storing to the same pointer and has the same size etc.
2146         // replace this load's value with the stored value.
2147         CallInst *Call = cast<CallInst>(Inst);
2148         CallInst *EarlierCall = cast<CallInst>(EarlierInst);
2149         Value *Arg = Call->getArgOperand(0);
2150         Value *EarlierArg = EarlierCall->getArgOperand(0);
2151         switch (PA.getAA()->alias(Arg, EarlierArg)) {
2152         case AliasResult::MustAlias:
2153           Changed = true;
2154           // If the load has a builtin retain, insert a plain retain for it.
2155           if (Class == ARCInstKind::LoadWeakRetained) {
2156             Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
2157             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
2158             CI->setTailCall();
2159           }
2160           // Zap the fully redundant load.
2161           Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
2162           Call->eraseFromParent();
2163           goto clobbered;
2164         case AliasResult::MayAlias:
2165         case AliasResult::PartialAlias:
2166           goto clobbered;
2167         case AliasResult::NoAlias:
2168           break;
2169         }
2170         break;
2171       }
2172       case ARCInstKind::MoveWeak:
2173       case ARCInstKind::CopyWeak:
2174         // TOOD: Grab the copied value.
2175         goto clobbered;
2176       case ARCInstKind::AutoreleasepoolPush:
2177       case ARCInstKind::None:
2178       case ARCInstKind::IntrinsicUser:
2179       case ARCInstKind::User:
2180         // Weak pointers are only modified through the weak entry points
2181         // (and arbitrary calls, which could call the weak entry points).
2182         break;
2183       default:
2184         // Anything else could modify the weak pointer.
2185         goto clobbered;
2186       }
2187     }
2188   clobbered:;
2189   }
2190 
2191   // Then, for each destroyWeak with an alloca operand, check to see if
2192   // the alloca and all its users can be zapped.
2193   for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
2194     ARCInstKind Class = GetBasicARCInstKind(&Inst);
2195     if (Class != ARCInstKind::DestroyWeak)
2196       continue;
2197 
2198     CallInst *Call = cast<CallInst>(&Inst);
2199     Value *Arg = Call->getArgOperand(0);
2200     if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
2201       for (User *U : Alloca->users()) {
2202         const Instruction *UserInst = cast<Instruction>(U);
2203         switch (GetBasicARCInstKind(UserInst)) {
2204         case ARCInstKind::InitWeak:
2205         case ARCInstKind::StoreWeak:
2206         case ARCInstKind::DestroyWeak:
2207           continue;
2208         default:
2209           goto done;
2210         }
2211       }
2212       Changed = true;
2213       for (User *U : llvm::make_early_inc_range(Alloca->users())) {
2214         CallInst *UserInst = cast<CallInst>(U);
2215         switch (GetBasicARCInstKind(UserInst)) {
2216         case ARCInstKind::InitWeak:
2217         case ARCInstKind::StoreWeak:
2218           // These functions return their second argument.
2219           UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
2220           break;
2221         case ARCInstKind::DestroyWeak:
2222           // No return value.
2223           break;
2224         default:
2225           llvm_unreachable("alloca really is used!");
2226         }
2227         UserInst->eraseFromParent();
2228       }
2229       Alloca->eraseFromParent();
2230     done:;
2231     }
2232   }
2233 }
2234 
2235 /// Identify program paths which execute sequences of retains and releases which
2236 /// can be eliminated.
OptimizeSequences(Function & F)2237 bool ObjCARCOpt::OptimizeSequences(Function &F) {
2238   // Releases, Retains - These are used to store the results of the main flow
2239   // analysis. These use Value* as the key instead of Instruction* so that the
2240   // map stays valid when we get around to rewriting code and calls get
2241   // replaced by arguments.
2242   DenseMap<Value *, RRInfo> Releases;
2243   BlotMapVector<Value *, RRInfo> Retains;
2244 
2245   // This is used during the traversal of the function to track the
2246   // states for each identified object at each block.
2247   DenseMap<const BasicBlock *, BBState> BBStates;
2248 
2249   // Analyze the CFG of the function, and all instructions.
2250   bool NestingDetected = Visit(F, BBStates, Retains, Releases);
2251 
2252   if (DisableRetainReleasePairing)
2253     return false;
2254 
2255   // Transform.
2256   bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
2257                                                            Releases,
2258                                                            F.getParent());
2259 
2260   return AnyPairsCompletelyEliminated && NestingDetected;
2261 }
2262 
2263 /// Check if there is a dependent call earlier that does not have anything in
2264 /// between the Retain and the call that can affect the reference count of their
2265 /// shared pointer argument. Note that Retain need not be in BB.
HasSafePathToPredecessorCall(const Value * Arg,Instruction * Retain,ProvenanceAnalysis & PA)2266 static CallInst *HasSafePathToPredecessorCall(const Value *Arg,
2267                                               Instruction *Retain,
2268                                               ProvenanceAnalysis &PA) {
2269   auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency(
2270       CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA));
2271 
2272   // Check that the pointer is the return value of the call.
2273   if (!Call || Arg != Call)
2274     return nullptr;
2275 
2276   // Check that the call is a regular call.
2277   ARCInstKind Class = GetBasicARCInstKind(Call);
2278   return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call
2279              ? Call
2280              : nullptr;
2281 }
2282 
2283 /// Find a dependent retain that precedes the given autorelease for which there
2284 /// is nothing in between the two instructions that can affect the ref count of
2285 /// Arg.
2286 static CallInst *
FindPredecessorRetainWithSafePath(const Value * Arg,BasicBlock * BB,Instruction * Autorelease,ProvenanceAnalysis & PA)2287 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
2288                                   Instruction *Autorelease,
2289                                   ProvenanceAnalysis &PA) {
2290   auto *Retain = dyn_cast_or_null<CallInst>(
2291       findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA));
2292 
2293   // Check that we found a retain with the same argument.
2294   if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
2295       GetArgRCIdentityRoot(Retain) != Arg) {
2296     return nullptr;
2297   }
2298 
2299   return Retain;
2300 }
2301 
2302 /// Look for an ``autorelease'' instruction dependent on Arg such that there are
2303 /// no instructions dependent on Arg that need a positive ref count in between
2304 /// the autorelease and the ret.
2305 static CallInst *
FindPredecessorAutoreleaseWithSafePath(const Value * Arg,BasicBlock * BB,ReturnInst * Ret,ProvenanceAnalysis & PA)2306 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
2307                                        ReturnInst *Ret,
2308                                        ProvenanceAnalysis &PA) {
2309   SmallPtrSet<Instruction *, 4> DepInsts;
2310   auto *Autorelease = dyn_cast_or_null<CallInst>(
2311       findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA));
2312 
2313   if (!Autorelease)
2314     return nullptr;
2315   ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
2316   if (!IsAutorelease(AutoreleaseClass))
2317     return nullptr;
2318   if (GetArgRCIdentityRoot(Autorelease) != Arg)
2319     return nullptr;
2320 
2321   return Autorelease;
2322 }
2323 
2324 /// Look for this pattern:
2325 /// \code
2326 ///    %call = call i8* @something(...)
2327 ///    %2 = call i8* @objc_retain(i8* %call)
2328 ///    %3 = call i8* @objc_autorelease(i8* %2)
2329 ///    ret i8* %3
2330 /// \endcode
2331 /// And delete the retain and autorelease.
OptimizeReturns(Function & F)2332 void ObjCARCOpt::OptimizeReturns(Function &F) {
2333   if (!F.getReturnType()->isPointerTy())
2334     return;
2335 
2336   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
2337 
2338   for (BasicBlock &BB: F) {
2339     ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
2340     if (!Ret)
2341       continue;
2342 
2343     LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
2344 
2345     const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0));
2346 
2347     // Look for an ``autorelease'' instruction that is a predecessor of Ret and
2348     // dependent on Arg such that there are no instructions dependent on Arg
2349     // that need a positive ref count in between the autorelease and Ret.
2350     CallInst *Autorelease =
2351         FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA);
2352 
2353     if (!Autorelease)
2354       continue;
2355 
2356     CallInst *Retain = FindPredecessorRetainWithSafePath(
2357         Arg, Autorelease->getParent(), Autorelease, PA);
2358 
2359     if (!Retain)
2360       continue;
2361 
2362     // Check that there is nothing that can affect the reference count
2363     // between the retain and the call.  Note that Retain need not be in BB.
2364     CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA);
2365 
2366     // Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call.
2367     if (!Call ||
2368         (!Call->isTailCall() &&
2369          GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV &&
2370          GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV))
2371       continue;
2372 
2373     // If so, we can zap the retain and autorelease.
2374     Changed = true;
2375     ++NumRets;
2376     LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease
2377                       << "\n");
2378     BundledInsts->eraseInst(Retain);
2379     EraseInstruction(Autorelease);
2380   }
2381 }
2382 
2383 #ifndef NDEBUG
2384 void
GatherStatistics(Function & F,bool AfterOptimization)2385 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
2386   Statistic &NumRetains =
2387       AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt;
2388   Statistic &NumReleases =
2389       AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt;
2390 
2391   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
2392     Instruction *Inst = &*I++;
2393     switch (GetBasicARCInstKind(Inst)) {
2394     default:
2395       break;
2396     case ARCInstKind::Retain:
2397       ++NumRetains;
2398       break;
2399     case ARCInstKind::Release:
2400       ++NumReleases;
2401       break;
2402     }
2403   }
2404 }
2405 #endif
2406 
init(Function & F)2407 void ObjCARCOpt::init(Function &F) {
2408   if (!EnableARCOpts)
2409     return;
2410 
2411   // Intuitively, objc_retain and others are nocapture, however in practice
2412   // they are not, because they return their argument value. And objc_release
2413   // calls finalizers which can have arbitrary side effects.
2414   MDKindCache.init(F.getParent());
2415 
2416   // Initialize our runtime entry point cache.
2417   EP.init(F.getParent());
2418 
2419   // Compute which blocks are in which funclet.
2420   if (F.hasPersonalityFn() &&
2421       isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
2422     BlockEHColors = colorEHFunclets(F);
2423 }
2424 
run(Function & F,AAResults & AA)2425 bool ObjCARCOpt::run(Function &F, AAResults &AA) {
2426   if (!EnableARCOpts)
2427     return false;
2428 
2429   Changed = CFGChanged = false;
2430   BundledRetainClaimRVs BRV(/*ContractPass=*/false);
2431   BundledInsts = &BRV;
2432 
2433   LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
2434                     << " >>>"
2435                        "\n");
2436 
2437   std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, nullptr);
2438   Changed |= R.first;
2439   CFGChanged |= R.second;
2440 
2441   PA.setAA(&AA);
2442 
2443 #ifndef NDEBUG
2444   if (AreStatisticsEnabled()) {
2445     GatherStatistics(F, false);
2446   }
2447 #endif
2448 
2449   // This pass performs several distinct transformations. As a compile-time aid
2450   // when compiling code that isn't ObjC, skip these if the relevant ObjC
2451   // library functions aren't declared.
2452 
2453   // Preliminary optimizations. This also computes UsedInThisFunction.
2454   OptimizeIndividualCalls(F);
2455 
2456   // Optimizations for weak pointers.
2457   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) |
2458                             (1 << unsigned(ARCInstKind::LoadWeakRetained)) |
2459                             (1 << unsigned(ARCInstKind::StoreWeak)) |
2460                             (1 << unsigned(ARCInstKind::InitWeak)) |
2461                             (1 << unsigned(ARCInstKind::CopyWeak)) |
2462                             (1 << unsigned(ARCInstKind::MoveWeak)) |
2463                             (1 << unsigned(ARCInstKind::DestroyWeak))))
2464     OptimizeWeakCalls(F);
2465 
2466   // Optimizations for retain+release pairs.
2467   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) |
2468                             (1 << unsigned(ARCInstKind::RetainRV)) |
2469                             (1 << unsigned(ARCInstKind::RetainBlock))))
2470     if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release)))
2471       // Run OptimizeSequences until it either stops making changes or
2472       // no retain+release pair nesting is detected.
2473       while (OptimizeSequences(F)) {}
2474 
2475   // Optimizations if objc_autorelease is used.
2476   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) |
2477                             (1 << unsigned(ARCInstKind::AutoreleaseRV))))
2478     OptimizeReturns(F);
2479 
2480   // Gather statistics after optimization.
2481 #ifndef NDEBUG
2482   if (AreStatisticsEnabled()) {
2483     GatherStatistics(F, true);
2484   }
2485 #endif
2486 
2487   LLVM_DEBUG(dbgs() << "\n");
2488 
2489   return Changed;
2490 }
2491 
2492 /// @}
2493 ///
2494 
run(Function & F,FunctionAnalysisManager & AM)2495 PreservedAnalyses ObjCARCOptPass::run(Function &F,
2496                                       FunctionAnalysisManager &AM) {
2497   ObjCARCOpt OCAO;
2498   OCAO.init(F);
2499 
2500   bool Changed = OCAO.run(F, AM.getResult<AAManager>(F));
2501   bool CFGChanged = OCAO.hasCFGChanged();
2502   if (Changed) {
2503     PreservedAnalyses PA;
2504     if (!CFGChanged)
2505       PA.preserveSet<CFGAnalyses>();
2506     return PA;
2507   }
2508   return PreservedAnalyses::all();
2509 }
2510