1 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the transformation that optimizes memory intrinsics
10 // such as memcpy using the size value profile. When memory intrinsic size
11 // value profile metadata is available, a single memory intrinsic is expanded
12 // to a sequence of guarded specialized versions that are called with the
13 // hottest size(s), for later expansion into more optimal inline sequences.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/DomTreeUpdater.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/IR/PassManager.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/PassRegistry.h"
40 #include "llvm/ProfileData/InstrProf.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/MathExtras.h"
46 #include "llvm/Transforms/Instrumentation.h"
47 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
48 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <vector>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "pgo-memop-opt"
56 
57 STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
58 STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
59 
60 // The minimum call count to optimize memory intrinsic calls.
61 static cl::opt<unsigned>
62     MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
63                         cl::init(1000),
64                         cl::desc("The minimum count to optimize memory "
65                                  "intrinsic calls"));
66 
67 // Command line option to disable memory intrinsic optimization. The default is
68 // false. This is for debug purpose.
69 static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
70                                      cl::Hidden, cl::desc("Disable optimize"));
71 
72 // The percent threshold to optimize memory intrinsic calls.
73 static cl::opt<unsigned>
74     MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
75                           cl::Hidden, cl::ZeroOrMore,
76                           cl::desc("The percentage threshold for the "
77                                    "memory intrinsic calls optimization"));
78 
79 // Maximum number of versions for optimizing memory intrinsic call.
80 static cl::opt<unsigned>
81     MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
82                     cl::ZeroOrMore,
83                     cl::desc("The max version for the optimized memory "
84                              " intrinsic calls"));
85 
86 // Scale the counts from the annotation using the BB count value.
87 static cl::opt<bool>
88     MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
89                     cl::desc("Scale the memop size counts using the basic "
90                              " block count value"));
91 
92 // This option sets the rangge of precise profile memop sizes.
93 extern cl::opt<std::string> MemOPSizeRange;
94 
95 // This option sets the value that groups large memop sizes
96 extern cl::opt<unsigned> MemOPSizeLarge;
97 
98 cl::opt<bool>
99     MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
100                        cl::Hidden,
101                        cl::desc("Size-specialize memcmp and bcmp calls"));
102 
103 namespace {
104 class PGOMemOPSizeOptLegacyPass : public FunctionPass {
105 public:
106   static char ID;
107 
PGOMemOPSizeOptLegacyPass()108   PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
109     initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
110   }
111 
getPassName() const112   StringRef getPassName() const override { return "PGOMemOPSize"; }
113 
114 private:
115   bool runOnFunction(Function &F) override;
getAnalysisUsage(AnalysisUsage & AU) const116   void getAnalysisUsage(AnalysisUsage &AU) const override {
117     AU.addRequired<BlockFrequencyInfoWrapperPass>();
118     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
119     AU.addPreserved<GlobalsAAWrapperPass>();
120     AU.addPreserved<DominatorTreeWrapperPass>();
121     AU.addRequired<TargetLibraryInfoWrapperPass>();
122   }
123 };
124 } // end anonymous namespace
125 
126 char PGOMemOPSizeOptLegacyPass::ID = 0;
127 INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
128                       "Optimize memory intrinsic using its size value profile",
129                       false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)130 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
131 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
132 INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
133                     "Optimize memory intrinsic using its size value profile",
134                     false, false)
135 
136 FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
137   return new PGOMemOPSizeOptLegacyPass();
138 }
139 
140 namespace {
141 
getMIName(const MemIntrinsic * MI)142 static const char *getMIName(const MemIntrinsic *MI) {
143   switch (MI->getIntrinsicID()) {
144   case Intrinsic::memcpy:
145     return "memcpy";
146   case Intrinsic::memmove:
147     return "memmove";
148   case Intrinsic::memset:
149     return "memset";
150   default:
151     return "unknown";
152   }
153 }
154 
155 // A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
156 struct MemOp {
157   Instruction *I;
MemOp__anon6fd325110211::MemOp158   MemOp(MemIntrinsic *MI) : I(MI) {}
MemOp__anon6fd325110211::MemOp159   MemOp(CallInst *CI) : I(CI) {}
asMI__anon6fd325110211::MemOp160   MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
asCI__anon6fd325110211::MemOp161   CallInst *asCI() { return cast<CallInst>(I); }
clone__anon6fd325110211::MemOp162   MemOp clone() {
163     if (auto MI = asMI())
164       return MemOp(cast<MemIntrinsic>(MI->clone()));
165     return MemOp(cast<CallInst>(asCI()->clone()));
166   }
getLength__anon6fd325110211::MemOp167   Value *getLength() {
168     if (auto MI = asMI())
169       return MI->getLength();
170     return asCI()->getArgOperand(2);
171   }
setLength__anon6fd325110211::MemOp172   void setLength(Value *Length) {
173     if (auto MI = asMI())
174       return MI->setLength(Length);
175     asCI()->setArgOperand(2, Length);
176   }
getFuncName__anon6fd325110211::MemOp177   StringRef getFuncName() {
178     if (auto MI = asMI())
179       return MI->getCalledFunction()->getName();
180     return asCI()->getCalledFunction()->getName();
181   }
isMemmove__anon6fd325110211::MemOp182   bool isMemmove() {
183     if (auto MI = asMI())
184       if (MI->getIntrinsicID() == Intrinsic::memmove)
185         return true;
186     return false;
187   }
isMemcmp__anon6fd325110211::MemOp188   bool isMemcmp(TargetLibraryInfo &TLI) {
189     LibFunc Func;
190     if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
191         Func == LibFunc_memcmp) {
192       return true;
193     }
194     return false;
195   }
isBcmp__anon6fd325110211::MemOp196   bool isBcmp(TargetLibraryInfo &TLI) {
197     LibFunc Func;
198     if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
199         Func == LibFunc_bcmp) {
200       return true;
201     }
202     return false;
203   }
getName__anon6fd325110211::MemOp204   const char *getName(TargetLibraryInfo &TLI) {
205     if (auto MI = asMI())
206       return getMIName(MI);
207     LibFunc Func;
208     if (TLI.getLibFunc(*asCI(), Func)) {
209       if (Func == LibFunc_memcmp)
210         return "memcmp";
211       if (Func == LibFunc_bcmp)
212         return "bcmp";
213     }
214     llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
215     return nullptr;
216   }
217 };
218 
219 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
220 public:
MemOPSizeOpt(Function & Func,BlockFrequencyInfo & BFI,OptimizationRemarkEmitter & ORE,DominatorTree * DT,TargetLibraryInfo & TLI)221   MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
222                OptimizationRemarkEmitter &ORE, DominatorTree *DT,
223                TargetLibraryInfo &TLI)
224       : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
225     ValueDataArray =
226         std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
227     // Get the MemOPSize range information from option MemOPSizeRange,
228     getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
229                                 PreciseRangeLast);
230   }
isChanged() const231   bool isChanged() const { return Changed; }
perform()232   void perform() {
233     WorkList.clear();
234     visit(Func);
235 
236     for (auto &MO : WorkList) {
237       ++NumOfPGOMemOPAnnotate;
238       if (perform(MO)) {
239         Changed = true;
240         ++NumOfPGOMemOPOpt;
241         LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
242                           << "is Transformed.\n");
243       }
244     }
245   }
246 
visitMemIntrinsic(MemIntrinsic & MI)247   void visitMemIntrinsic(MemIntrinsic &MI) {
248     Value *Length = MI.getLength();
249     // Not perform on constant length calls.
250     if (dyn_cast<ConstantInt>(Length))
251       return;
252     WorkList.push_back(MemOp(&MI));
253   }
254 
visitCallInst(CallInst & CI)255   void visitCallInst(CallInst &CI) {
256     LibFunc Func;
257     if (TLI.getLibFunc(CI, Func) &&
258         (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
259         !dyn_cast<ConstantInt>(CI.getArgOperand(2))) {
260       WorkList.push_back(MemOp(&CI));
261     }
262   }
263 
264 private:
265   Function &Func;
266   BlockFrequencyInfo &BFI;
267   OptimizationRemarkEmitter &ORE;
268   DominatorTree *DT;
269   TargetLibraryInfo &TLI;
270   bool Changed;
271   std::vector<MemOp> WorkList;
272   // Start of the previse range.
273   int64_t PreciseRangeStart;
274   // Last value of the previse range.
275   int64_t PreciseRangeLast;
276   // The space to read the profile annotation.
277   std::unique_ptr<InstrProfValueData[]> ValueDataArray;
278   bool perform(MemOp MO);
279 
280   // This kind shows which group the value falls in. For PreciseValue, we have
281   // the profile count for that value. LargeGroup groups the values that are in
282   // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
283   enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
284 
getMemOPSizeKind(int64_t Value) const285   MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
286     if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
287       return LargeGroup;
288     if (Value == PreciseRangeLast + 1)
289       return NonLargeGroup;
290     return PreciseValue;
291   }
292 };
293 
isProfitable(uint64_t Count,uint64_t TotalCount)294 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
295   assert(Count <= TotalCount);
296   if (Count < MemOPCountThreshold)
297     return false;
298   if (Count < TotalCount * MemOPPercentThreshold / 100)
299     return false;
300   return true;
301 }
302 
getScaledCount(uint64_t Count,uint64_t Num,uint64_t Denom)303 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
304                                       uint64_t Denom) {
305   if (!MemOPScaleCount)
306     return Count;
307   bool Overflowed;
308   uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
309   return ScaleCount / Denom;
310 }
311 
perform(MemOp MO)312 bool MemOPSizeOpt::perform(MemOp MO) {
313   assert(MO.I);
314   if (MO.isMemmove())
315     return false;
316   if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
317     return false;
318 
319   uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
320   uint64_t TotalCount;
321   if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions,
322                                 ValueDataArray.get(), NumVals, TotalCount))
323     return false;
324 
325   uint64_t ActualCount = TotalCount;
326   uint64_t SavedTotalCount = TotalCount;
327   if (MemOPScaleCount) {
328     auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
329     if (!BBEdgeCount)
330       return false;
331     ActualCount = *BBEdgeCount;
332   }
333 
334   ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
335   LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
336                     << ActualCount << "\n");
337   LLVM_DEBUG(
338       for (auto &VD
339            : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; });
340 
341   if (ActualCount < MemOPCountThreshold)
342     return false;
343   // Skip if the total value profiled count is 0, in which case we can't
344   // scale up the counts properly (and there is no profitable transformation).
345   if (TotalCount == 0)
346     return false;
347 
348   TotalCount = ActualCount;
349   if (MemOPScaleCount)
350     LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
351                       << " denominator = " << SavedTotalCount << "\n");
352 
353   // Keeping track of the count of the default case:
354   uint64_t RemainCount = TotalCount;
355   uint64_t SavedRemainCount = SavedTotalCount;
356   SmallVector<uint64_t, 16> SizeIds;
357   SmallVector<uint64_t, 16> CaseCounts;
358   uint64_t MaxCount = 0;
359   unsigned Version = 0;
360   // Default case is in the front -- save the slot here.
361   CaseCounts.push_back(0);
362   for (auto &VD : VDs) {
363     int64_t V = VD.Value;
364     uint64_t C = VD.Count;
365     if (MemOPScaleCount)
366       C = getScaledCount(C, ActualCount, SavedTotalCount);
367 
368     // Only care precise value here.
369     if (getMemOPSizeKind(V) != PreciseValue)
370       continue;
371 
372     // ValueCounts are sorted on the count. Break at the first un-profitable
373     // value.
374     if (!isProfitable(C, RemainCount))
375       break;
376 
377     SizeIds.push_back(V);
378     CaseCounts.push_back(C);
379     if (C > MaxCount)
380       MaxCount = C;
381 
382     assert(RemainCount >= C);
383     RemainCount -= C;
384     assert(SavedRemainCount >= VD.Count);
385     SavedRemainCount -= VD.Count;
386 
387     if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
388       break;
389   }
390 
391   if (Version == 0)
392     return false;
393 
394   CaseCounts[0] = RemainCount;
395   if (RemainCount > MaxCount)
396     MaxCount = RemainCount;
397 
398   uint64_t SumForOpt = TotalCount - RemainCount;
399 
400   LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
401                     << " Versions (covering " << SumForOpt << " out of "
402                     << TotalCount << ")\n");
403 
404   // mem_op(..., size)
405   // ==>
406   // switch (size) {
407   //   case s1:
408   //      mem_op(..., s1);
409   //      goto merge_bb;
410   //   case s2:
411   //      mem_op(..., s2);
412   //      goto merge_bb;
413   //   ...
414   //   default:
415   //      mem_op(..., size);
416   //      goto merge_bb;
417   // }
418   // merge_bb:
419 
420   BasicBlock *BB = MO.I->getParent();
421   LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
422   LLVM_DEBUG(dbgs() << *BB << "\n");
423   auto OrigBBFreq = BFI.getBlockFreq(BB);
424 
425   BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
426   BasicBlock::iterator It(*MO.I);
427   ++It;
428   assert(It != DefaultBB->end());
429   BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
430   MergeBB->setName("MemOP.Merge");
431   BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
432   DefaultBB->setName("MemOP.Default");
433 
434   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
435   auto &Ctx = Func.getContext();
436   IRBuilder<> IRB(BB);
437   BB->getTerminator()->eraseFromParent();
438   Value *SizeVar = MO.getLength();
439   SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
440   Type *MemOpTy = MO.I->getType();
441   PHINode *PHI = nullptr;
442   if (!MemOpTy->isVoidTy()) {
443     // Insert a phi for the return values at the merge block.
444     IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
445     PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
446     MO.I->replaceAllUsesWith(PHI);
447     PHI->addIncoming(MO.I, DefaultBB);
448   }
449 
450   // Clear the value profile data.
451   MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
452   // If all promoted, we don't need the MD.prof metadata.
453   if (SavedRemainCount > 0 || Version != NumVals)
454     // Otherwise we need update with the un-promoted records back.
455     annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version),
456                       SavedRemainCount, IPVK_MemOPSize, NumVals);
457 
458   LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
459 
460   std::vector<DominatorTree::UpdateType> Updates;
461   if (DT)
462     Updates.reserve(2 * SizeIds.size());
463 
464   for (uint64_t SizeId : SizeIds) {
465     BasicBlock *CaseBB = BasicBlock::Create(
466         Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
467     MemOp NewMO = MO.clone();
468     // Fix the argument.
469     auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
470     assert(SizeType && "Expected integer type size argument.");
471     ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
472     NewMO.setLength(CaseSizeId);
473     CaseBB->getInstList().push_back(NewMO.I);
474     IRBuilder<> IRBCase(CaseBB);
475     IRBCase.CreateBr(MergeBB);
476     SI->addCase(CaseSizeId, CaseBB);
477     if (!MemOpTy->isVoidTy())
478       PHI->addIncoming(NewMO.I, CaseBB);
479     if (DT) {
480       Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
481       Updates.push_back({DominatorTree::Insert, BB, CaseBB});
482     }
483     LLVM_DEBUG(dbgs() << *CaseBB << "\n");
484   }
485   DTU.applyUpdates(Updates);
486   Updates.clear();
487 
488   setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
489 
490   LLVM_DEBUG(dbgs() << *BB << "\n");
491   LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
492   LLVM_DEBUG(dbgs() << *MergeBB << "\n");
493 
494   ORE.emit([&]() {
495     using namespace ore;
496     return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
497            << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
498            << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
499            << " for " << NV("Versions", Version) << " versions";
500   });
501 
502   return true;
503 }
504 } // namespace
505 
PGOMemOPSizeOptImpl(Function & F,BlockFrequencyInfo & BFI,OptimizationRemarkEmitter & ORE,DominatorTree * DT,TargetLibraryInfo & TLI)506 static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
507                                 OptimizationRemarkEmitter &ORE,
508                                 DominatorTree *DT, TargetLibraryInfo &TLI) {
509   if (DisableMemOPOPT)
510     return false;
511 
512   if (F.hasFnAttribute(Attribute::OptimizeForSize))
513     return false;
514   MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
515   MemOPSizeOpt.perform();
516   return MemOPSizeOpt.isChanged();
517 }
518 
runOnFunction(Function & F)519 bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
520   BlockFrequencyInfo &BFI =
521       getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
522   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
523   auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
524   DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
525   TargetLibraryInfo &TLI =
526       getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
527   return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
528 }
529 
530 namespace llvm {
531 char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
532 
run(Function & F,FunctionAnalysisManager & FAM)533 PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
534                                        FunctionAnalysisManager &FAM) {
535   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
536   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
537   auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
538   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
539   bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
540   if (!Changed)
541     return PreservedAnalyses::all();
542   auto PA = PreservedAnalyses();
543   PA.preserve<GlobalsAA>();
544   PA.preserve<DominatorTreeAnalysis>();
545   return PA;
546 }
547 } // namespace llvm
548