1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/BasicAliasAnalysis.h"
20 #include "llvm/Analysis/CGSCCPassManager.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/InlineAdvisor.h"
23 #include "llvm/Analysis/ProfileSummaryInfo.h"
24 #include "llvm/Analysis/ScopedNoAliasAA.h"
25 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Passes/OptimizationLevel.h"
28 #include "llvm/Passes/PassBuilder.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/PGOOptions.h"
32 #include "llvm/Support/VirtualFileSystem.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
35 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
36 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
37 #include "llvm/Transforms/Coroutines/CoroEarly.h"
38 #include "llvm/Transforms/Coroutines/CoroElide.h"
39 #include "llvm/Transforms/Coroutines/CoroSplit.h"
40 #include "llvm/Transforms/HipStdPar/HipStdPar.h"
41 #include "llvm/Transforms/IPO/AlwaysInliner.h"
42 #include "llvm/Transforms/IPO/Annotation2Metadata.h"
43 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
44 #include "llvm/Transforms/IPO/Attributor.h"
45 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
46 #include "llvm/Transforms/IPO/ConstantMerge.h"
47 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
48 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
49 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
50 #include "llvm/Transforms/IPO/EmbedBitcodePass.h"
51 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
52 #include "llvm/Transforms/IPO/FunctionAttrs.h"
53 #include "llvm/Transforms/IPO/GlobalDCE.h"
54 #include "llvm/Transforms/IPO/GlobalOpt.h"
55 #include "llvm/Transforms/IPO/GlobalSplit.h"
56 #include "llvm/Transforms/IPO/HotColdSplitting.h"
57 #include "llvm/Transforms/IPO/IROutliner.h"
58 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
59 #include "llvm/Transforms/IPO/Inliner.h"
60 #include "llvm/Transforms/IPO/LowerTypeTests.h"
61 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
62 #include "llvm/Transforms/IPO/MergeFunctions.h"
63 #include "llvm/Transforms/IPO/ModuleInliner.h"
64 #include "llvm/Transforms/IPO/OpenMPOpt.h"
65 #include "llvm/Transforms/IPO/PartialInlining.h"
66 #include "llvm/Transforms/IPO/SCCP.h"
67 #include "llvm/Transforms/IPO/SampleProfile.h"
68 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
69 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
70 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
71 #include "llvm/Transforms/InstCombine/InstCombine.h"
72 #include "llvm/Transforms/Instrumentation/CGProfile.h"
73 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
74 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
75 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
76 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
77 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
78 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
79 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
80 #include "llvm/Transforms/Scalar/ADCE.h"
81 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
82 #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
83 #include "llvm/Transforms/Scalar/BDCE.h"
84 #include "llvm/Transforms/Scalar/CallSiteSplitting.h"
85 #include "llvm/Transforms/Scalar/ConstraintElimination.h"
86 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
87 #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
88 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
89 #include "llvm/Transforms/Scalar/DivRemPairs.h"
90 #include "llvm/Transforms/Scalar/EarlyCSE.h"
91 #include "llvm/Transforms/Scalar/Float2Int.h"
92 #include "llvm/Transforms/Scalar/GVN.h"
93 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
94 #include "llvm/Transforms/Scalar/InferAlignment.h"
95 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
96 #include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
97 #include "llvm/Transforms/Scalar/JumpThreading.h"
98 #include "llvm/Transforms/Scalar/LICM.h"
99 #include "llvm/Transforms/Scalar/LoopDeletion.h"
100 #include "llvm/Transforms/Scalar/LoopDistribute.h"
101 #include "llvm/Transforms/Scalar/LoopFlatten.h"
102 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
103 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
104 #include "llvm/Transforms/Scalar/LoopInterchange.h"
105 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
106 #include "llvm/Transforms/Scalar/LoopPassManager.h"
107 #include "llvm/Transforms/Scalar/LoopRotation.h"
108 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
109 #include "llvm/Transforms/Scalar/LoopSink.h"
110 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
111 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
112 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
113 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
114 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
115 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
116 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
117 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
118 #include "llvm/Transforms/Scalar/NewGVN.h"
119 #include "llvm/Transforms/Scalar/Reassociate.h"
120 #include "llvm/Transforms/Scalar/SCCP.h"
121 #include "llvm/Transforms/Scalar/SROA.h"
122 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
123 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
124 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
125 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
126 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
127 #include "llvm/Transforms/Utils/AddDiscriminators.h"
128 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
129 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
130 #include "llvm/Transforms/Utils/CountVisits.h"
131 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
132 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
133 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
134 #include "llvm/Transforms/Utils/Mem2Reg.h"
135 #include "llvm/Transforms/Utils/MoveAutoInit.h"
136 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
137 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
138 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
139 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
140 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
141 #include "llvm/Transforms/Vectorize/VectorCombine.h"
142 
143 using namespace llvm;
144 
145 static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
146     "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
147     cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
148     cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
149                           "Heuristics-based inliner version"),
150                clEnumValN(InliningAdvisorMode::Development, "development",
151                           "Use development mode (runtime-loadable model)"),
152                clEnumValN(InliningAdvisorMode::Release, "release",
153                           "Use release mode (AOT-compiled model)")));
154 
155 static cl::opt<bool> EnableSyntheticCounts(
156     "enable-npm-synthetic-counts", cl::Hidden,
157     cl::desc("Run synthetic function entry count generation "
158              "pass"));
159 
160 /// Flag to enable inline deferral during PGO.
161 static cl::opt<bool>
162     EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
163                             cl::Hidden,
164                             cl::desc("Enable inline deferral during PGO"));
165 
166 static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
167                                          cl::init(false), cl::Hidden,
168                                          cl::desc("Enable module inliner"));
169 
170 static cl::opt<bool> PerformMandatoryInliningsFirst(
171     "mandatory-inlining-first", cl::init(false), cl::Hidden,
172     cl::desc("Perform mandatory inlinings module-wide, before performing "
173              "inlining"));
174 
175 static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
176     "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
177     cl::desc("Eagerly invalidate more analyses in default pipelines"));
178 
179 static cl::opt<bool> EnableMergeFunctions(
180     "enable-merge-functions", cl::init(false), cl::Hidden,
181     cl::desc("Enable function merging as part of the optimization pipeline"));
182 
183 static cl::opt<bool> EnablePostPGOLoopRotation(
184     "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
185     cl::desc("Run the loop rotation transformation after PGO instrumentation"));
186 
187 static cl::opt<bool> EnableGlobalAnalyses(
188     "enable-global-analyses", cl::init(true), cl::Hidden,
189     cl::desc("Enable inter-procedural analyses"));
190 
191 static cl::opt<bool>
192     RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
193                        cl::desc("Run Partial inlinining pass"));
194 
195 static cl::opt<bool> ExtraVectorizerPasses(
196     "extra-vectorizer-passes", cl::init(false), cl::Hidden,
197     cl::desc("Run cleanup optimization passes after vectorization"));
198 
199 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
200                                cl::desc("Run the NewGVN pass"));
201 
202 static cl::opt<bool> EnableLoopInterchange(
203     "enable-loopinterchange", cl::init(false), cl::Hidden,
204     cl::desc("Enable the experimental LoopInterchange Pass"));
205 
206 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
207                                         cl::init(false), cl::Hidden,
208                                         cl::desc("Enable Unroll And Jam Pass"));
209 
210 static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
211                                        cl::Hidden,
212                                        cl::desc("Enable the LoopFlatten Pass"));
213 
214 // Experimentally allow loop header duplication. This should allow for better
215 // optimization at Oz, since loop-idiom recognition can then recognize things
216 // like memcpy. If this ends up being useful for many targets, we should drop
217 // this flag and make a code generation option that can be controlled
218 // independent of the opt level and exposed through the frontend.
219 static cl::opt<bool> EnableLoopHeaderDuplication(
220     "enable-loop-header-duplication", cl::init(false), cl::Hidden,
221     cl::desc("Enable loop header duplication at any optimization level"));
222 
223 static cl::opt<bool>
224     EnableDFAJumpThreading("enable-dfa-jump-thread",
225                            cl::desc("Enable DFA jump threading"),
226                            cl::init(false), cl::Hidden);
227 
228 // TODO: turn on and remove flag
229 static cl::opt<bool> EnablePGOForceFunctionAttrs(
230     "enable-pgo-force-function-attrs",
231     cl::desc("Enable pass to set function attributes based on PGO profiles"),
232     cl::init(false));
233 
234 static cl::opt<bool>
235     EnableHotColdSplit("hot-cold-split",
236                        cl::desc("Enable hot-cold splitting pass"));
237 
238 static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
239                                       cl::Hidden,
240                                       cl::desc("Enable ir outliner pass"));
241 
242 static cl::opt<bool>
243     DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
244                       cl::desc("Disable pre-instrumentation inliner"));
245 
246 static cl::opt<int> PreInlineThreshold(
247     "preinline-threshold", cl::Hidden, cl::init(75),
248     cl::desc("Control the amount of inlining in pre-instrumentation inliner "
249              "(default = 75)"));
250 
251 static cl::opt<bool>
252     EnableGVNHoist("enable-gvn-hoist",
253                    cl::desc("Enable the GVN hoisting pass (default = off)"));
254 
255 static cl::opt<bool>
256     EnableGVNSink("enable-gvn-sink",
257                   cl::desc("Enable the GVN sinking pass (default = off)"));
258 
259 static cl::opt<bool> EnableJumpTableToSwitch(
260     "enable-jump-table-to-switch",
261     cl::desc("Enable JumpTableToSwitch pass (default = off)"));
262 
263 // This option is used in simplifying testing SampleFDO optimizations for
264 // profile loading.
265 static cl::opt<bool>
266     EnableCHR("enable-chr", cl::init(true), cl::Hidden,
267               cl::desc("Enable control height reduction optimization (CHR)"));
268 
269 static cl::opt<bool> FlattenedProfileUsed(
270     "flattened-profile-used", cl::init(false), cl::Hidden,
271     cl::desc("Indicate the sample profile being used is flattened, i.e., "
272              "no inline hierachy exists in the profile"));
273 
274 static cl::opt<bool> EnableOrderFileInstrumentation(
275     "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
276     cl::desc("Enable order file instrumentation (default = off)"));
277 
278 static cl::opt<bool>
279     EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
280                  cl::desc("Enable lowering of the matrix intrinsics"));
281 
282 static cl::opt<bool> EnableConstraintElimination(
283     "enable-constraint-elimination", cl::init(true), cl::Hidden,
284     cl::desc(
285         "Enable pass to eliminate conditions based on linear constraints"));
286 
287 static cl::opt<AttributorRunOption> AttributorRun(
288     "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
289     cl::desc("Enable the attributor inter-procedural deduction pass"),
290     cl::values(clEnumValN(AttributorRunOption::ALL, "all",
291                           "enable all attributor runs"),
292                clEnumValN(AttributorRunOption::MODULE, "module",
293                           "enable module-wide attributor runs"),
294                clEnumValN(AttributorRunOption::CGSCC, "cgscc",
295                           "enable call graph SCC attributor runs"),
296                clEnumValN(AttributorRunOption::NONE, "none",
297                           "disable attributor runs")));
298 
299 static cl::opt<bool> EnableSampledInstr(
300     "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
301     cl::desc("Enable profile instrumentation sampling (default = off)"));
302 static cl::opt<bool> UseLoopVersioningLICM(
303     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
304     cl::desc("Enable the experimental Loop Versioning LICM pass"));
305 
306 namespace llvm {
307 extern cl::opt<bool> EnableMemProfContextDisambiguation;
308 
309 extern cl::opt<bool> EnableInferAlignmentPass;
310 } // namespace llvm
311 
PipelineTuningOptions()312 PipelineTuningOptions::PipelineTuningOptions() {
313   LoopInterleaving = true;
314   LoopVectorization = true;
315   SLPVectorization = false;
316   LoopUnrolling = true;
317   ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
318   LicmMssaOptCap = SetLicmMssaOptCap;
319   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
320   CallGraphProfile = true;
321   UnifiedLTO = false;
322   MergeFunctions = EnableMergeFunctions;
323   InlinerThreshold = -1;
324   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
325 }
326 
327 namespace llvm {
328 extern cl::opt<unsigned> MaxDevirtIterations;
329 } // namespace llvm
330 
invokePeepholeEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)331 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
332                                             OptimizationLevel Level) {
333   for (auto &C : PeepholeEPCallbacks)
334     C(FPM, Level);
335 }
invokeLateLoopOptimizationsEPCallbacks(LoopPassManager & LPM,OptimizationLevel Level)336 void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
337     LoopPassManager &LPM, OptimizationLevel Level) {
338   for (auto &C : LateLoopOptimizationsEPCallbacks)
339     C(LPM, Level);
340 }
invokeLoopOptimizerEndEPCallbacks(LoopPassManager & LPM,OptimizationLevel Level)341 void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
342                                                     OptimizationLevel Level) {
343   for (auto &C : LoopOptimizerEndEPCallbacks)
344     C(LPM, Level);
345 }
invokeScalarOptimizerLateEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)346 void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
347     FunctionPassManager &FPM, OptimizationLevel Level) {
348   for (auto &C : ScalarOptimizerLateEPCallbacks)
349     C(FPM, Level);
350 }
invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager & CGPM,OptimizationLevel Level)351 void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
352                                                       OptimizationLevel Level) {
353   for (auto &C : CGSCCOptimizerLateEPCallbacks)
354     C(CGPM, Level);
355 }
invokeVectorizerStartEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)356 void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
357                                                    OptimizationLevel Level) {
358   for (auto &C : VectorizerStartEPCallbacks)
359     C(FPM, Level);
360 }
invokeOptimizerEarlyEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)361 void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
362                                                   OptimizationLevel Level) {
363   for (auto &C : OptimizerEarlyEPCallbacks)
364     C(MPM, Level);
365 }
invokeOptimizerLastEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)366 void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
367                                                  OptimizationLevel Level) {
368   for (auto &C : OptimizerLastEPCallbacks)
369     C(MPM, Level);
370 }
invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)371 void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
372     ModulePassManager &MPM, OptimizationLevel Level) {
373   for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
374     C(MPM, Level);
375 }
invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)376 void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
377     ModulePassManager &MPM, OptimizationLevel Level) {
378   for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
379     C(MPM, Level);
380 }
invokePipelineStartEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)381 void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
382                                                  OptimizationLevel Level) {
383   for (auto &C : PipelineStartEPCallbacks)
384     C(MPM, Level);
385 }
invokePipelineEarlySimplificationEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)386 void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
387     ModulePassManager &MPM, OptimizationLevel Level) {
388   for (auto &C : PipelineEarlySimplificationEPCallbacks)
389     C(MPM, Level);
390 }
391 
392 // Helper to add AnnotationRemarksPass.
addAnnotationRemarksPass(ModulePassManager & MPM)393 static void addAnnotationRemarksPass(ModulePassManager &MPM) {
394   MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
395 }
396 
397 // Helper to check if the current compilation phase is preparing for LTO
isLTOPreLink(ThinOrFullLTOPhase Phase)398 static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
399   return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
400          Phase == ThinOrFullLTOPhase::FullLTOPreLink;
401 }
402 
403 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
404 FunctionPassManager
buildO1FunctionSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)405 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
406                                                    ThinOrFullLTOPhase Phase) {
407 
408   FunctionPassManager FPM;
409 
410   if (AreStatisticsEnabled())
411     FPM.addPass(CountVisitsPass());
412 
413   // Form SSA out of local memory accesses after breaking apart aggregates into
414   // scalars.
415   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
416 
417   // Catch trivial redundancies
418   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
419 
420   // Hoisting of scalars and load expressions.
421   FPM.addPass(
422       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
423   FPM.addPass(InstCombinePass());
424 
425   FPM.addPass(LibCallsShrinkWrapPass());
426 
427   invokePeepholeEPCallbacks(FPM, Level);
428 
429   FPM.addPass(
430       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
431 
432   // Form canonically associated expression trees, and simplify the trees using
433   // basic mathematical properties. For example, this will form (nearly)
434   // minimal multiplication trees.
435   FPM.addPass(ReassociatePass());
436 
437   // Add the primary loop simplification pipeline.
438   // FIXME: Currently this is split into two loop pass pipelines because we run
439   // some function passes in between them. These can and should be removed
440   // and/or replaced by scheduling the loop pass equivalents in the correct
441   // positions. But those equivalent passes aren't powerful enough yet.
442   // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
443   // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
444   // fully replace `SimplifyCFGPass`, and the closest to the other we have is
445   // `LoopInstSimplify`.
446   LoopPassManager LPM1, LPM2;
447 
448   // Simplify the loop body. We do this initially to clean up after other loop
449   // passes run, either when iterating on a loop or on inner loops with
450   // implications on the outer loop.
451   LPM1.addPass(LoopInstSimplifyPass());
452   LPM1.addPass(LoopSimplifyCFGPass());
453 
454   // Try to remove as much code from the loop header as possible,
455   // to reduce amount of IR that will have to be duplicated. However,
456   // do not perform speculative hoisting the first time as LICM
457   // will destroy metadata that may not need to be destroyed if run
458   // after loop rotation.
459   // TODO: Investigate promotion cap for O1.
460   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
461                         /*AllowSpeculation=*/false));
462 
463   LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
464                               isLTOPreLink(Phase)));
465   // TODO: Investigate promotion cap for O1.
466   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
467                         /*AllowSpeculation=*/true));
468   LPM1.addPass(SimpleLoopUnswitchPass());
469   if (EnableLoopFlatten)
470     LPM1.addPass(LoopFlattenPass());
471 
472   LPM2.addPass(LoopIdiomRecognizePass());
473   LPM2.addPass(IndVarSimplifyPass());
474 
475   invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
476 
477   LPM2.addPass(LoopDeletionPass());
478 
479   if (EnableLoopInterchange)
480     LPM2.addPass(LoopInterchangePass());
481 
482   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
483   // because it changes IR to makes profile annotation in back compile
484   // inaccurate. The normal unroller doesn't pay attention to forced full unroll
485   // attributes so we need to make sure and allow the full unroll pass to pay
486   // attention to it.
487   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
488       PGOOpt->Action != PGOOptions::SampleUse)
489     LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
490                                     /* OnlyWhenForced= */ !PTO.LoopUnrolling,
491                                     PTO.ForgetAllSCEVInLoopUnroll));
492 
493   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
494 
495   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
496                                               /*UseMemorySSA=*/true,
497                                               /*UseBlockFrequencyInfo=*/true));
498   FPM.addPass(
499       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
500   FPM.addPass(InstCombinePass());
501   // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
502   // *All* loop passes must preserve it, in order to be able to use it.
503   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
504                                               /*UseMemorySSA=*/false,
505                                               /*UseBlockFrequencyInfo=*/false));
506 
507   // Delete small array after loop unroll.
508   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
509 
510   // Specially optimize memory movement as it doesn't look like dataflow in SSA.
511   FPM.addPass(MemCpyOptPass());
512 
513   // Sparse conditional constant propagation.
514   // FIXME: It isn't clear why we do this *after* loop passes rather than
515   // before...
516   FPM.addPass(SCCPPass());
517 
518   // Delete dead bit computations (instcombine runs after to fold away the dead
519   // computations, and then ADCE will run later to exploit any new DCE
520   // opportunities that creates).
521   FPM.addPass(BDCEPass());
522 
523   // Run instcombine after redundancy and dead bit elimination to exploit
524   // opportunities opened up by them.
525   FPM.addPass(InstCombinePass());
526   invokePeepholeEPCallbacks(FPM, Level);
527 
528   FPM.addPass(CoroElidePass());
529 
530   invokeScalarOptimizerLateEPCallbacks(FPM, Level);
531 
532   // Finally, do an expensive DCE pass to catch all the dead code exposed by
533   // the simplifications and basic cleanup after all the simplifications.
534   // TODO: Investigate if this is too expensive.
535   FPM.addPass(ADCEPass());
536   FPM.addPass(
537       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
538   FPM.addPass(InstCombinePass());
539   invokePeepholeEPCallbacks(FPM, Level);
540 
541   return FPM;
542 }
543 
544 FunctionPassManager
buildFunctionSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)545 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
546                                                  ThinOrFullLTOPhase Phase) {
547   assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
548 
549   // The O1 pipeline has a separate pipeline creation function to simplify
550   // construction readability.
551   if (Level.getSpeedupLevel() == 1)
552     return buildO1FunctionSimplificationPipeline(Level, Phase);
553 
554   FunctionPassManager FPM;
555 
556   if (AreStatisticsEnabled())
557     FPM.addPass(CountVisitsPass());
558 
559   // Form SSA out of local memory accesses after breaking apart aggregates into
560   // scalars.
561   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
562 
563   // Catch trivial redundancies
564   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
565   if (EnableKnowledgeRetention)
566     FPM.addPass(AssumeSimplifyPass());
567 
568   // Hoisting of scalars and load expressions.
569   if (EnableGVNHoist)
570     FPM.addPass(GVNHoistPass());
571 
572   // Global value numbering based sinking.
573   if (EnableGVNSink) {
574     FPM.addPass(GVNSinkPass());
575     FPM.addPass(
576         SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
577   }
578 
579   // Speculative execution if the target has divergent branches; otherwise nop.
580   FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
581 
582   // Optimize based on known information about branches, and cleanup afterward.
583   FPM.addPass(JumpThreadingPass());
584   FPM.addPass(CorrelatedValuePropagationPass());
585 
586   // Jump table to switch conversion.
587   if (EnableJumpTableToSwitch)
588     FPM.addPass(JumpTableToSwitchPass());
589 
590   FPM.addPass(
591       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
592   FPM.addPass(InstCombinePass());
593   FPM.addPass(AggressiveInstCombinePass());
594 
595   if (!Level.isOptimizingForSize())
596     FPM.addPass(LibCallsShrinkWrapPass());
597 
598   invokePeepholeEPCallbacks(FPM, Level);
599 
600   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
601   // using the size value profile. Don't perform this when optimizing for size.
602   if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
603       !Level.isOptimizingForSize())
604     FPM.addPass(PGOMemOPSizeOpt());
605 
606   FPM.addPass(TailCallElimPass());
607   FPM.addPass(
608       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
609 
610   // Form canonically associated expression trees, and simplify the trees using
611   // basic mathematical properties. For example, this will form (nearly)
612   // minimal multiplication trees.
613   FPM.addPass(ReassociatePass());
614 
615   if (EnableConstraintElimination)
616     FPM.addPass(ConstraintEliminationPass());
617 
618   // Add the primary loop simplification pipeline.
619   // FIXME: Currently this is split into two loop pass pipelines because we run
620   // some function passes in between them. These can and should be removed
621   // and/or replaced by scheduling the loop pass equivalents in the correct
622   // positions. But those equivalent passes aren't powerful enough yet.
623   // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
624   // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
625   // fully replace `SimplifyCFGPass`, and the closest to the other we have is
626   // `LoopInstSimplify`.
627   LoopPassManager LPM1, LPM2;
628 
629   // Simplify the loop body. We do this initially to clean up after other loop
630   // passes run, either when iterating on a loop or on inner loops with
631   // implications on the outer loop.
632   LPM1.addPass(LoopInstSimplifyPass());
633   LPM1.addPass(LoopSimplifyCFGPass());
634 
635   // Try to remove as much code from the loop header as possible,
636   // to reduce amount of IR that will have to be duplicated. However,
637   // do not perform speculative hoisting the first time as LICM
638   // will destroy metadata that may not need to be destroyed if run
639   // after loop rotation.
640   // TODO: Investigate promotion cap for O1.
641   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
642                         /*AllowSpeculation=*/false));
643 
644   // Disable header duplication in loop rotation at -Oz.
645   LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
646                                   Level != OptimizationLevel::Oz,
647                               isLTOPreLink(Phase)));
648   // TODO: Investigate promotion cap for O1.
649   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
650                         /*AllowSpeculation=*/true));
651   LPM1.addPass(
652       SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
653   if (EnableLoopFlatten)
654     LPM1.addPass(LoopFlattenPass());
655 
656   LPM2.addPass(LoopIdiomRecognizePass());
657   LPM2.addPass(IndVarSimplifyPass());
658 
659   {
660     ExtraSimpleLoopUnswitchPassManager ExtraPasses;
661     ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
662                                                OptimizationLevel::O3));
663     LPM2.addPass(std::move(ExtraPasses));
664   }
665 
666   invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
667 
668   LPM2.addPass(LoopDeletionPass());
669 
670   if (EnableLoopInterchange)
671     LPM2.addPass(LoopInterchangePass());
672 
673   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
674   // because it changes IR to makes profile annotation in back compile
675   // inaccurate. The normal unroller doesn't pay attention to forced full unroll
676   // attributes so we need to make sure and allow the full unroll pass to pay
677   // attention to it.
678   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
679       PGOOpt->Action != PGOOptions::SampleUse)
680     LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
681                                     /* OnlyWhenForced= */ !PTO.LoopUnrolling,
682                                     PTO.ForgetAllSCEVInLoopUnroll));
683 
684   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
685 
686   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
687                                               /*UseMemorySSA=*/true,
688                                               /*UseBlockFrequencyInfo=*/true));
689   FPM.addPass(
690       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
691   FPM.addPass(InstCombinePass());
692   // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
693   // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
694   // *All* loop passes must preserve it, in order to be able to use it.
695   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
696                                               /*UseMemorySSA=*/false,
697                                               /*UseBlockFrequencyInfo=*/false));
698 
699   // Delete small array after loop unroll.
700   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
701 
702   // Try vectorization/scalarization transforms that are both improvements
703   // themselves and can allow further folds with GVN and InstCombine.
704   FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
705 
706   // Eliminate redundancies.
707   FPM.addPass(MergedLoadStoreMotionPass());
708   if (RunNewGVN)
709     FPM.addPass(NewGVNPass());
710   else
711     FPM.addPass(GVNPass());
712 
713   // Sparse conditional constant propagation.
714   // FIXME: It isn't clear why we do this *after* loop passes rather than
715   // before...
716   FPM.addPass(SCCPPass());
717 
718   // Delete dead bit computations (instcombine runs after to fold away the dead
719   // computations, and then ADCE will run later to exploit any new DCE
720   // opportunities that creates).
721   FPM.addPass(BDCEPass());
722 
723   // Run instcombine after redundancy and dead bit elimination to exploit
724   // opportunities opened up by them.
725   FPM.addPass(InstCombinePass());
726   invokePeepholeEPCallbacks(FPM, Level);
727 
728   // Re-consider control flow based optimizations after redundancy elimination,
729   // redo DCE, etc.
730   if (EnableDFAJumpThreading)
731     FPM.addPass(DFAJumpThreadingPass());
732 
733   FPM.addPass(JumpThreadingPass());
734   FPM.addPass(CorrelatedValuePropagationPass());
735 
736   // Finally, do an expensive DCE pass to catch all the dead code exposed by
737   // the simplifications and basic cleanup after all the simplifications.
738   // TODO: Investigate if this is too expensive.
739   FPM.addPass(ADCEPass());
740 
741   // Specially optimize memory movement as it doesn't look like dataflow in SSA.
742   FPM.addPass(MemCpyOptPass());
743 
744   FPM.addPass(DSEPass());
745   FPM.addPass(MoveAutoInitPass());
746 
747   FPM.addPass(createFunctionToLoopPassAdaptor(
748       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
749                /*AllowSpeculation=*/true),
750       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
751 
752   FPM.addPass(CoroElidePass());
753 
754   invokeScalarOptimizerLateEPCallbacks(FPM, Level);
755 
756   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
757                                   .convertSwitchRangeToICmp(true)
758                                   .hoistCommonInsts(true)
759                                   .sinkCommonInsts(true)));
760   FPM.addPass(InstCombinePass());
761   invokePeepholeEPCallbacks(FPM, Level);
762 
763   return FPM;
764 }
765 
addRequiredLTOPreLinkPasses(ModulePassManager & MPM)766 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
767   MPM.addPass(CanonicalizeAliasesPass());
768   MPM.addPass(NameAnonGlobalPass());
769 }
770 
addPreInlinerPasses(ModulePassManager & MPM,OptimizationLevel Level,ThinOrFullLTOPhase LTOPhase)771 void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
772                                       OptimizationLevel Level,
773                                       ThinOrFullLTOPhase LTOPhase) {
774   assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
775   if (DisablePreInliner)
776     return;
777   InlineParams IP;
778 
779   IP.DefaultThreshold = PreInlineThreshold;
780 
781   // FIXME: The hint threshold has the same value used by the regular inliner
782   // when not optimzing for size. This should probably be lowered after
783   // performance testing.
784   // FIXME: this comment is cargo culted from the old pass manager, revisit).
785   IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
786   ModuleInlinerWrapperPass MIWP(
787       IP, /* MandatoryFirst */ true,
788       InlineContext{LTOPhase, InlinePass::EarlyInliner});
789   CGSCCPassManager &CGPipeline = MIWP.getPM();
790 
791   FunctionPassManager FPM;
792   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
793   FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
794   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
795       true)));                    // Merge & remove basic blocks.
796   FPM.addPass(InstCombinePass()); // Combine silly sequences.
797   invokePeepholeEPCallbacks(FPM, Level);
798 
799   CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
800       std::move(FPM), PTO.EagerlyInvalidateAnalyses));
801 
802   MPM.addPass(std::move(MIWP));
803 
804   // Delete anything that is now dead to make sure that we don't instrument
805   // dead code. Instrumentation can end up keeping dead code around and
806   // dramatically increase code size.
807   MPM.addPass(GlobalDCEPass());
808 }
809 
addPostPGOLoopRotation(ModulePassManager & MPM,OptimizationLevel Level)810 void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
811                                          OptimizationLevel Level) {
812   if (EnablePostPGOLoopRotation) {
813     // Disable header duplication in loop rotation at -Oz.
814     MPM.addPass(createModuleToFunctionPassAdaptor(
815         createFunctionToLoopPassAdaptor(
816             LoopRotatePass(EnableLoopHeaderDuplication ||
817                            Level != OptimizationLevel::Oz),
818             /*UseMemorySSA=*/false,
819             /*UseBlockFrequencyInfo=*/false),
820         PTO.EagerlyInvalidateAnalyses));
821   }
822 }
823 
addPGOInstrPasses(ModulePassManager & MPM,OptimizationLevel Level,bool RunProfileGen,bool IsCS,bool AtomicCounterUpdate,std::string ProfileFile,std::string ProfileRemappingFile,IntrusiveRefCntPtr<vfs::FileSystem> FS)824 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
825                                     OptimizationLevel Level, bool RunProfileGen,
826                                     bool IsCS, bool AtomicCounterUpdate,
827                                     std::string ProfileFile,
828                                     std::string ProfileRemappingFile,
829                                     IntrusiveRefCntPtr<vfs::FileSystem> FS) {
830   assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
831 
832   if (!RunProfileGen) {
833     assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
834     MPM.addPass(
835         PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
836     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
837     // RequireAnalysisPass for PSI before subsequent non-module passes.
838     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
839     return;
840   }
841 
842   // Perform PGO instrumentation.
843   MPM.addPass(PGOInstrumentationGen(IsCS));
844 
845   addPostPGOLoopRotation(MPM, Level);
846   // Add the profile lowering pass.
847   InstrProfOptions Options;
848   if (!ProfileFile.empty())
849     Options.InstrProfileOutput = ProfileFile;
850   // Do counter promotion at Level greater than O0.
851   Options.DoCounterPromotion = true;
852   Options.UseBFIInPromotion = IsCS;
853   if (EnableSampledInstr) {
854     Options.Sampling = true;
855     // With sampling, there is little beneifit to enable counter promotion.
856     // But note that sampling does work with counter promotion.
857     Options.DoCounterPromotion = false;
858   }
859   Options.Atomic = AtomicCounterUpdate;
860   MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
861 }
862 
addPGOInstrPassesForO0(ModulePassManager & MPM,bool RunProfileGen,bool IsCS,bool AtomicCounterUpdate,std::string ProfileFile,std::string ProfileRemappingFile,IntrusiveRefCntPtr<vfs::FileSystem> FS)863 void PassBuilder::addPGOInstrPassesForO0(
864     ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
865     bool AtomicCounterUpdate, std::string ProfileFile,
866     std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
867   if (!RunProfileGen) {
868     assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
869     MPM.addPass(
870         PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
871     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
872     // RequireAnalysisPass for PSI before subsequent non-module passes.
873     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
874     return;
875   }
876 
877   // Perform PGO instrumentation.
878   MPM.addPass(PGOInstrumentationGen(IsCS));
879   // Add the profile lowering pass.
880   InstrProfOptions Options;
881   if (!ProfileFile.empty())
882     Options.InstrProfileOutput = ProfileFile;
883   // Do not do counter promotion at O0.
884   Options.DoCounterPromotion = false;
885   Options.UseBFIInPromotion = IsCS;
886   Options.Atomic = AtomicCounterUpdate;
887   MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
888 }
889 
getInlineParamsFromOptLevel(OptimizationLevel Level)890 static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
891   return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
892 }
893 
894 ModuleInlinerWrapperPass
buildInlinerPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)895 PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
896                                   ThinOrFullLTOPhase Phase) {
897   InlineParams IP;
898   if (PTO.InlinerThreshold == -1)
899     IP = getInlineParamsFromOptLevel(Level);
900   else
901     IP = getInlineParams(PTO.InlinerThreshold);
902   // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
903   // disable hot callsite inline (as much as possible [1]) because it makes
904   // profile annotation in the backend inaccurate.
905   //
906   // [1] Note the cost of a function could be below zero due to erased
907   // prologue / epilogue.
908   if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
909       PGOOpt->Action == PGOOptions::SampleUse)
910     IP.HotCallSiteThreshold = 0;
911 
912   if (PGOOpt)
913     IP.EnableDeferral = EnablePGOInlineDeferral;
914 
915   ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
916                                 InlineContext{Phase, InlinePass::CGSCCInliner},
917                                 UseInlineAdvisor, MaxDevirtIterations);
918 
919   // Require the GlobalsAA analysis for the module so we can query it within
920   // the CGSCC pipeline.
921   if (EnableGlobalAnalyses) {
922     MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
923     // Invalidate AAManager so it can be recreated and pick up the newly
924     // available GlobalsAA.
925     MIWP.addModulePass(
926         createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
927   }
928 
929   // Require the ProfileSummaryAnalysis for the module so we can query it within
930   // the inliner pass.
931   MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
932 
933   // Now begin the main postorder CGSCC pipeline.
934   // FIXME: The current CGSCC pipeline has its origins in the legacy pass
935   // manager and trying to emulate its precise behavior. Much of this doesn't
936   // make a lot of sense and we should revisit the core CGSCC structure.
937   CGSCCPassManager &MainCGPipeline = MIWP.getPM();
938 
939   // Note: historically, the PruneEH pass was run first to deduce nounwind and
940   // generally clean up exception handling overhead. It isn't clear this is
941   // valuable as the inliner doesn't currently care whether it is inlining an
942   // invoke or a call.
943 
944   if (AttributorRun & AttributorRunOption::CGSCC)
945     MainCGPipeline.addPass(AttributorCGSCCPass());
946 
947   // Deduce function attributes. We do another run of this after the function
948   // simplification pipeline, so this only needs to run when it could affect the
949   // function simplification pipeline, which is only the case with recursive
950   // functions.
951   MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
952 
953   // When at O3 add argument promotion to the pass pipeline.
954   // FIXME: It isn't at all clear why this should be limited to O3.
955   if (Level == OptimizationLevel::O3)
956     MainCGPipeline.addPass(ArgumentPromotionPass());
957 
958   // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
959   // there are no OpenMP runtime calls present in the module.
960   if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
961     MainCGPipeline.addPass(OpenMPOptCGSCCPass());
962 
963   invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
964 
965   // Add the core function simplification pipeline nested inside the
966   // CGSCC walk.
967   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
968       buildFunctionSimplificationPipeline(Level, Phase),
969       PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
970 
971   // Finally, deduce any function attributes based on the fully simplified
972   // function.
973   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
974 
975   // Mark that the function is fully simplified and that it shouldn't be
976   // simplified again if we somehow revisit it due to CGSCC mutations unless
977   // it's been modified since.
978   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
979       RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
980 
981   MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
982 
983   // Make sure we don't affect potential future NoRerun CGSCC adaptors.
984   MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
985       InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
986 
987   return MIWP;
988 }
989 
990 ModulePassManager
buildModuleInlinerPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)991 PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
992                                         ThinOrFullLTOPhase Phase) {
993   ModulePassManager MPM;
994 
995   InlineParams IP = getInlineParamsFromOptLevel(Level);
996   // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
997   // disable hot callsite inline (as much as possible [1]) because it makes
998   // profile annotation in the backend inaccurate.
999   //
1000   // [1] Note the cost of a function could be below zero due to erased
1001   // prologue / epilogue.
1002   if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1003       PGOOpt->Action == PGOOptions::SampleUse)
1004     IP.HotCallSiteThreshold = 0;
1005 
1006   if (PGOOpt)
1007     IP.EnableDeferral = EnablePGOInlineDeferral;
1008 
1009   // The inline deferral logic is used to avoid losing some
1010   // inlining chance in future. It is helpful in SCC inliner, in which
1011   // inlining is processed in bottom-up order.
1012   // While in module inliner, the inlining order is a priority-based order
1013   // by default. The inline deferral is unnecessary there. So we disable the
1014   // inline deferral logic in module inliner.
1015   IP.EnableDeferral = false;
1016 
1017   MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1018 
1019   MPM.addPass(createModuleToFunctionPassAdaptor(
1020       buildFunctionSimplificationPipeline(Level, Phase),
1021       PTO.EagerlyInvalidateAnalyses));
1022 
1023   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1024       CoroSplitPass(Level != OptimizationLevel::O0)));
1025 
1026   return MPM;
1027 }
1028 
1029 ModulePassManager
buildModuleSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)1030 PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1031                                                ThinOrFullLTOPhase Phase) {
1032   assert(Level != OptimizationLevel::O0 &&
1033          "Should not be used for O0 pipeline");
1034 
1035   assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1036          "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1037 
1038   ModulePassManager MPM;
1039 
1040   // Place pseudo probe instrumentation as the first pass of the pipeline to
1041   // minimize the impact of optimization changes.
1042   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1043       Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1044     MPM.addPass(SampleProfileProbePass(TM));
1045 
1046   bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1047 
1048   // In ThinLTO mode, when flattened profile is used, all the available
1049   // profile information will be annotated in PreLink phase so there is
1050   // no need to load the profile again in PostLink.
1051   bool LoadSampleProfile =
1052       HasSampleProfile &&
1053       !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1054 
1055   // During the ThinLTO backend phase we perform early indirect call promotion
1056   // here, before globalopt. Otherwise imported available_externally functions
1057   // look unreferenced and are removed. If we are going to load the sample
1058   // profile then defer until later.
1059   // TODO: See if we can move later and consolidate with the location where
1060   // we perform ICP when we are loading a sample profile.
1061   // TODO: We pass HasSampleProfile (whether there was a sample profile file
1062   // passed to the compile) to the SamplePGO flag of ICP. This is used to
1063   // determine whether the new direct calls are annotated with prof metadata.
1064   // Ideally this should be determined from whether the IR is annotated with
1065   // sample profile, and not whether the a sample profile was provided on the
1066   // command line. E.g. for flattened profiles where we will not be reloading
1067   // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1068   // provide the sample profile file.
1069   if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1070     MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1071 
1072   // Create an early function pass manager to cleanup the output of the
1073   // frontend. Not necessary with LTO post link pipelines since the pre link
1074   // pipeline already cleaned up the frontend output.
1075   if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1076     // Do basic inference of function attributes from known properties of system
1077     // libraries and other oracles.
1078     MPM.addPass(InferFunctionAttrsPass());
1079     MPM.addPass(CoroEarlyPass());
1080 
1081     FunctionPassManager EarlyFPM;
1082     EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1083     // Lower llvm.expect to metadata before attempting transforms.
1084     // Compare/branch metadata may alter the behavior of passes like
1085     // SimplifyCFG.
1086     EarlyFPM.addPass(LowerExpectIntrinsicPass());
1087     EarlyFPM.addPass(SimplifyCFGPass());
1088     EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1089     EarlyFPM.addPass(EarlyCSEPass());
1090     if (Level == OptimizationLevel::O3)
1091       EarlyFPM.addPass(CallSiteSplittingPass());
1092     MPM.addPass(createModuleToFunctionPassAdaptor(
1093         std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1094   }
1095 
1096   if (LoadSampleProfile) {
1097     // Annotate sample profile right after early FPM to ensure freshness of
1098     // the debug info.
1099     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1100                                         PGOOpt->ProfileRemappingFile, Phase));
1101     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1102     // RequireAnalysisPass for PSI before subsequent non-module passes.
1103     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1104     // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1105     // for the profile annotation to be accurate in the LTO backend.
1106     if (!isLTOPreLink(Phase))
1107       // We perform early indirect call promotion here, before globalopt.
1108       // This is important for the ThinLTO backend phase because otherwise
1109       // imported available_externally functions look unreferenced and are
1110       // removed.
1111       MPM.addPass(
1112           PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1113   }
1114 
1115   // Try to perform OpenMP specific optimizations on the module. This is a
1116   // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1117   MPM.addPass(OpenMPOptPass());
1118 
1119   if (AttributorRun & AttributorRunOption::MODULE)
1120     MPM.addPass(AttributorPass());
1121 
1122   // Lower type metadata and the type.test intrinsic in the ThinLTO
1123   // post link pipeline after ICP. This is to enable usage of the type
1124   // tests in ICP sequences.
1125   if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1126     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1127 
1128   invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
1129 
1130   // Interprocedural constant propagation now that basic cleanup has occurred
1131   // and prior to optimizing globals.
1132   // FIXME: This position in the pipeline hasn't been carefully considered in
1133   // years, it should be re-analyzed.
1134   MPM.addPass(IPSCCPPass(
1135               IPSCCPOptions(/*AllowFuncSpec=*/
1136                             Level != OptimizationLevel::Os &&
1137                             Level != OptimizationLevel::Oz &&
1138                             !isLTOPreLink(Phase))));
1139 
1140   // Attach metadata to indirect call sites indicating the set of functions
1141   // they may target at run-time. This should follow IPSCCP.
1142   MPM.addPass(CalledValuePropagationPass());
1143 
1144   // Optimize globals to try and fold them into constants.
1145   MPM.addPass(GlobalOptPass());
1146 
1147   // Create a small function pass pipeline to cleanup after all the global
1148   // optimizations.
1149   FunctionPassManager GlobalCleanupPM;
1150   // FIXME: Should this instead by a run of SROA?
1151   GlobalCleanupPM.addPass(PromotePass());
1152   GlobalCleanupPM.addPass(InstCombinePass());
1153   invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1154   GlobalCleanupPM.addPass(
1155       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1156   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1157                                                 PTO.EagerlyInvalidateAnalyses));
1158 
1159   // We already asserted this happens in non-FullLTOPostLink earlier.
1160   const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1161   const bool IsPGOPreLink = PGOOpt && IsPreLink;
1162   const bool IsPGOInstrGen =
1163       IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1164   const bool IsPGOInstrUse =
1165       IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1166   const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1167   // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1168   // enable ctx profiling from the frontend.
1169   assert(
1170       !(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&
1171       "Enabling both instrumented FDO and contextual instrumentation is not "
1172       "supported.");
1173   // Enable contextual profiling instrumentation.
1174   const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1175                             PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
1176 
1177   if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
1178     addPreInlinerPasses(MPM, Level, Phase);
1179 
1180   // Add all the requested passes for instrumentation PGO, if requested.
1181   if (IsPGOInstrGen || IsPGOInstrUse) {
1182     addPGOInstrPasses(MPM, Level,
1183                       /*RunProfileGen=*/IsPGOInstrGen,
1184                       /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1185                       PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1186                       PGOOpt->FS);
1187   } else if (IsCtxProfGen) {
1188     MPM.addPass(PGOInstrumentationGen(false));
1189     addPostPGOLoopRotation(MPM, Level);
1190     MPM.addPass(PGOCtxProfLoweringPass());
1191   }
1192 
1193   if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1194     MPM.addPass(PGOIndirectCallPromotion(false, false));
1195 
1196   if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1197     MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1198                                                EnableSampledInstr));
1199 
1200   if (IsMemprofUse)
1201     MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1202 
1203   // Synthesize function entry counts for non-PGO compilation.
1204   if (EnableSyntheticCounts && !PGOOpt)
1205     MPM.addPass(SyntheticCountsPropagation());
1206 
1207   if (EnablePGOForceFunctionAttrs && PGOOpt)
1208     MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1209 
1210   MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1211 
1212   if (EnableModuleInliner)
1213     MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
1214   else
1215     MPM.addPass(buildInlinerPipeline(Level, Phase));
1216 
1217   // Remove any dead arguments exposed by cleanups, constant folding globals,
1218   // and argument promotion.
1219   MPM.addPass(DeadArgumentEliminationPass());
1220 
1221   MPM.addPass(CoroCleanupPass());
1222 
1223   // Optimize globals now that functions are fully simplified.
1224   MPM.addPass(GlobalOptPass());
1225   MPM.addPass(GlobalDCEPass());
1226 
1227   return MPM;
1228 }
1229 
1230 /// TODO: Should LTO cause any differences to this set of passes?
addVectorPasses(OptimizationLevel Level,FunctionPassManager & FPM,bool IsFullLTO)1231 void PassBuilder::addVectorPasses(OptimizationLevel Level,
1232                                   FunctionPassManager &FPM, bool IsFullLTO) {
1233   FPM.addPass(LoopVectorizePass(
1234       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1235 
1236   if (EnableInferAlignmentPass)
1237     FPM.addPass(InferAlignmentPass());
1238   if (IsFullLTO) {
1239     // The vectorizer may have significantly shortened a loop body; unroll
1240     // again. Unroll small loops to hide loop backedge latency and saturate any
1241     // parallel execution resources of an out-of-order processor. We also then
1242     // need to clean up redundancies and loop invariant code.
1243     // FIXME: It would be really good to use a loop-integrated instruction
1244     // combiner for cleanup here so that the unrolling and LICM can be pipelined
1245     // across the loop nests.
1246     // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1247     if (EnableUnrollAndJam && PTO.LoopUnrolling)
1248       FPM.addPass(createFunctionToLoopPassAdaptor(
1249           LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1250     FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1251         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1252         PTO.ForgetAllSCEVInLoopUnroll)));
1253     FPM.addPass(WarnMissedTransformationsPass());
1254     // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1255     // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1256     // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1257     // NOTE: we are very late in the pipeline, and we don't have any LICM
1258     // or SimplifyCFG passes scheduled after us, that would cleanup
1259     // the CFG mess this may created if allowed to modify CFG, so forbid that.
1260     FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1261   }
1262 
1263   if (!IsFullLTO) {
1264     // Eliminate loads by forwarding stores from the previous iteration to loads
1265     // of the current iteration.
1266     FPM.addPass(LoopLoadEliminationPass());
1267   }
1268   // Cleanup after the loop optimization passes.
1269   FPM.addPass(InstCombinePass());
1270 
1271   if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1272     ExtraVectorPassManager ExtraPasses;
1273     // At higher optimization levels, try to clean up any runtime overlap and
1274     // alignment checks inserted by the vectorizer. We want to track correlated
1275     // runtime checks for two inner loops in the same outer loop, fold any
1276     // common computations, hoist loop-invariant aspects out of any outer loop,
1277     // and unswitch the runtime checks if possible. Once hoisted, we may have
1278     // dead (or speculatable) control flows or more combining opportunities.
1279     ExtraPasses.addPass(EarlyCSEPass());
1280     ExtraPasses.addPass(CorrelatedValuePropagationPass());
1281     ExtraPasses.addPass(InstCombinePass());
1282     LoopPassManager LPM;
1283     LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1284                          /*AllowSpeculation=*/true));
1285     LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1286                                        OptimizationLevel::O3));
1287     ExtraPasses.addPass(
1288         createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1289                                         /*UseBlockFrequencyInfo=*/true));
1290     ExtraPasses.addPass(
1291         SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1292     ExtraPasses.addPass(InstCombinePass());
1293     FPM.addPass(std::move(ExtraPasses));
1294   }
1295 
1296   // Now that we've formed fast to execute loop structures, we do further
1297   // optimizations. These are run afterward as they might block doing complex
1298   // analyses and transforms such as what are needed for loop vectorization.
1299 
1300   // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1301   // GVN, loop transforms, and others have already run, so it's now better to
1302   // convert to more optimized IR using more aggressive simplify CFG options.
1303   // The extra sinking transform can create larger basic blocks, so do this
1304   // before SLP vectorization.
1305   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1306                                   .forwardSwitchCondToPhi(true)
1307                                   .convertSwitchRangeToICmp(true)
1308                                   .convertSwitchToLookupTable(true)
1309                                   .needCanonicalLoops(false)
1310                                   .hoistCommonInsts(true)
1311                                   .sinkCommonInsts(true)));
1312 
1313   if (IsFullLTO) {
1314     FPM.addPass(SCCPPass());
1315     FPM.addPass(InstCombinePass());
1316     FPM.addPass(BDCEPass());
1317   }
1318 
1319   // Optimize parallel scalar instruction chains into SIMD instructions.
1320   if (PTO.SLPVectorization) {
1321     FPM.addPass(SLPVectorizerPass());
1322     if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1323       FPM.addPass(EarlyCSEPass());
1324     }
1325   }
1326   // Enhance/cleanup vector code.
1327   FPM.addPass(VectorCombinePass());
1328 
1329   if (!IsFullLTO) {
1330     FPM.addPass(InstCombinePass());
1331     // Unroll small loops to hide loop backedge latency and saturate any
1332     // parallel execution resources of an out-of-order processor. We also then
1333     // need to clean up redundancies and loop invariant code.
1334     // FIXME: It would be really good to use a loop-integrated instruction
1335     // combiner for cleanup here so that the unrolling and LICM can be pipelined
1336     // across the loop nests.
1337     // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1338     if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1339       FPM.addPass(createFunctionToLoopPassAdaptor(
1340           LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1341     }
1342     FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1343         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1344         PTO.ForgetAllSCEVInLoopUnroll)));
1345     FPM.addPass(WarnMissedTransformationsPass());
1346     // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1347     // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1348     // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1349     // NOTE: we are very late in the pipeline, and we don't have any LICM
1350     // or SimplifyCFG passes scheduled after us, that would cleanup
1351     // the CFG mess this may created if allowed to modify CFG, so forbid that.
1352     FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1353   }
1354 
1355   if (EnableInferAlignmentPass)
1356     FPM.addPass(InferAlignmentPass());
1357   FPM.addPass(InstCombinePass());
1358 
1359   // This is needed for two reasons:
1360   //   1. It works around problems that instcombine introduces, such as sinking
1361   //      expensive FP divides into loops containing multiplications using the
1362   //      divide result.
1363   //   2. It helps to clean up some loop-invariant code created by the loop
1364   //      unroll pass when IsFullLTO=false.
1365   FPM.addPass(createFunctionToLoopPassAdaptor(
1366       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1367                /*AllowSpeculation=*/true),
1368       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1369 
1370   // Now that we've vectorized and unrolled loops, we may have more refined
1371   // alignment information, try to re-derive it here.
1372   FPM.addPass(AlignmentFromAssumptionsPass());
1373 }
1374 
1375 ModulePassManager
buildModuleOptimizationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase LTOPhase)1376 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1377                                              ThinOrFullLTOPhase LTOPhase) {
1378   const bool LTOPreLink = isLTOPreLink(LTOPhase);
1379   ModulePassManager MPM;
1380 
1381   // Run partial inlining pass to partially inline functions that have
1382   // large bodies.
1383   if (RunPartialInlining)
1384     MPM.addPass(PartialInlinerPass());
1385 
1386   // Remove avail extern fns and globals definitions since we aren't compiling
1387   // an object file for later LTO. For LTO we want to preserve these so they
1388   // are eligible for inlining at link-time. Note if they are unreferenced they
1389   // will be removed by GlobalDCE later, so this only impacts referenced
1390   // available externally globals. Eventually they will be suppressed during
1391   // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1392   // may make globals referenced by available external functions dead and saves
1393   // running remaining passes on the eliminated functions. These should be
1394   // preserved during prelinking for link-time inlining decisions.
1395   if (!LTOPreLink)
1396     MPM.addPass(EliminateAvailableExternallyPass());
1397 
1398   if (EnableOrderFileInstrumentation)
1399     MPM.addPass(InstrOrderFilePass());
1400 
1401   // Do RPO function attribute inference across the module to forward-propagate
1402   // attributes where applicable.
1403   // FIXME: Is this really an optimization rather than a canonicalization?
1404   MPM.addPass(ReversePostOrderFunctionAttrsPass());
1405 
1406   // Do a post inline PGO instrumentation and use pass. This is a context
1407   // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1408   // cross-module inline has not been done yet. The context sensitive
1409   // instrumentation is after all the inlines are done.
1410   if (!LTOPreLink && PGOOpt) {
1411     if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1412       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1413                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1414                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1415                         PGOOpt->FS);
1416     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1417       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1418                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1419                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1420                         PGOOpt->FS);
1421   }
1422 
1423   // Re-compute GlobalsAA here prior to function passes. This is particularly
1424   // useful as the above will have inlined, DCE'ed, and function-attr
1425   // propagated everything. We should at this point have a reasonably minimal
1426   // and richly annotated call graph. By computing aliasing and mod/ref
1427   // information for all local globals here, the late loop passes and notably
1428   // the vectorizer will be able to use them to help recognize vectorizable
1429   // memory operations.
1430   if (EnableGlobalAnalyses)
1431     MPM.addPass(RecomputeGlobalsAAPass());
1432 
1433   invokeOptimizerEarlyEPCallbacks(MPM, Level);
1434 
1435   FunctionPassManager OptimizePM;
1436   // Scheduling LoopVersioningLICM when inlining is over, because after that
1437   // we may see more accurate aliasing. Reason to run this late is that too
1438   // early versioning may prevent further inlining due to increase of code
1439   // size. Other optimizations which runs later might get benefit of no-alias
1440   // assumption in clone loop.
1441   if (UseLoopVersioningLICM) {
1442     OptimizePM.addPass(
1443         createFunctionToLoopPassAdaptor(LoopVersioningLICMPass()));
1444     // LoopVersioningLICM pass might increase new LICM opportunities.
1445     OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1446         LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1447                  /*AllowSpeculation=*/true),
1448         /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1449   }
1450 
1451   OptimizePM.addPass(Float2IntPass());
1452   OptimizePM.addPass(LowerConstantIntrinsicsPass());
1453 
1454   if (EnableMatrix) {
1455     OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1456     OptimizePM.addPass(EarlyCSEPass());
1457   }
1458 
1459   // CHR pass should only be applied with the profile information.
1460   // The check is to check the profile summary information in CHR.
1461   if (EnableCHR && Level == OptimizationLevel::O3)
1462     OptimizePM.addPass(ControlHeightReductionPass());
1463 
1464   // FIXME: We need to run some loop optimizations to re-rotate loops after
1465   // simplifycfg and others undo their rotation.
1466 
1467   // Optimize the loop execution. These passes operate on entire loop nests
1468   // rather than on each loop in an inside-out manner, and so they are actually
1469   // function passes.
1470 
1471   invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1472 
1473   LoopPassManager LPM;
1474   // First rotate loops that may have been un-rotated by prior passes.
1475   // Disable header duplication at -Oz.
1476   LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
1477                                  Level != OptimizationLevel::Oz,
1478                              LTOPreLink));
1479   // Some loops may have become dead by now. Try to delete them.
1480   // FIXME: see discussion in https://reviews.llvm.org/D112851,
1481   //        this may need to be revisited once we run GVN before loop deletion
1482   //        in the simplification pipeline.
1483   LPM.addPass(LoopDeletionPass());
1484   OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1485       std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1486 
1487   // Distribute loops to allow partial vectorization.  I.e. isolate dependences
1488   // into separate loop that would otherwise inhibit vectorization.  This is
1489   // currently only performed for loops marked with the metadata
1490   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1491   OptimizePM.addPass(LoopDistributePass());
1492 
1493   // Populates the VFABI attribute with the scalar-to-vector mappings
1494   // from the TargetLibraryInfo.
1495   OptimizePM.addPass(InjectTLIMappings());
1496 
1497   addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1498 
1499   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1500   // canonicalization pass that enables other optimizations. As a result,
1501   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1502   // result too early.
1503   OptimizePM.addPass(LoopSinkPass());
1504 
1505   // And finally clean up LCSSA form before generating code.
1506   OptimizePM.addPass(InstSimplifyPass());
1507 
1508   // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1509   // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1510   // flattening of blocks.
1511   OptimizePM.addPass(DivRemPairsPass());
1512 
1513   // Try to annotate calls that were created during optimization.
1514   OptimizePM.addPass(TailCallElimPass());
1515 
1516   // LoopSink (and other loop passes since the last simplifyCFG) might have
1517   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1518   OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1519                                          .convertSwitchRangeToICmp(true)
1520                                          .speculateUnpredictables(true)));
1521 
1522   // Add the core optimizing pipeline.
1523   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1524                                                 PTO.EagerlyInvalidateAnalyses));
1525 
1526   invokeOptimizerLastEPCallbacks(MPM, Level);
1527 
1528   // Split out cold code. Splitting is done late to avoid hiding context from
1529   // other optimizations and inadvertently regressing performance. The tradeoff
1530   // is that this has a higher code size cost than splitting early.
1531   if (EnableHotColdSplit && !LTOPreLink)
1532     MPM.addPass(HotColdSplittingPass());
1533 
1534   // Search the code for similar regions of code. If enough similar regions can
1535   // be found where extracting the regions into their own function will decrease
1536   // the size of the program, we extract the regions, a deduplicate the
1537   // structurally similar regions.
1538   if (EnableIROutliner)
1539     MPM.addPass(IROutlinerPass());
1540 
1541   // Now we need to do some global optimization transforms.
1542   // FIXME: It would seem like these should come first in the optimization
1543   // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1544   // ordering here.
1545   MPM.addPass(GlobalDCEPass());
1546   MPM.addPass(ConstantMergePass());
1547 
1548   // Merge functions if requested. It has a better chance to merge functions
1549   // after ConstantMerge folded jump tables.
1550   if (PTO.MergeFunctions)
1551     MPM.addPass(MergeFunctionsPass());
1552 
1553   if (PTO.CallGraphProfile && !LTOPreLink)
1554     MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1555                               LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
1556 
1557   // TODO: Relative look table converter pass caused an issue when full lto is
1558   // enabled. See https://reviews.llvm.org/D94355 for more details.
1559   // Until the issue fixed, disable this pass during pre-linking phase.
1560   if (!LTOPreLink)
1561     MPM.addPass(RelLookupTableConverterPass());
1562 
1563   return MPM;
1564 }
1565 
1566 ModulePassManager
buildPerModuleDefaultPipeline(OptimizationLevel Level,bool LTOPreLink)1567 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1568                                            bool LTOPreLink) {
1569   if (Level == OptimizationLevel::O0)
1570     return buildO0DefaultPipeline(Level, LTOPreLink);
1571 
1572   ModulePassManager MPM;
1573 
1574   // Convert @llvm.global.annotations to !annotation metadata.
1575   MPM.addPass(Annotation2MetadataPass());
1576 
1577   // Force any function attributes we want the rest of the pipeline to observe.
1578   MPM.addPass(ForceFunctionAttrsPass());
1579 
1580   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1581     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1582 
1583   // Apply module pipeline start EP callback.
1584   invokePipelineStartEPCallbacks(MPM, Level);
1585 
1586   const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1587                                           ? ThinOrFullLTOPhase::FullLTOPreLink
1588                                           : ThinOrFullLTOPhase::None;
1589   // Add the core simplification pipeline.
1590   MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
1591 
1592   // Now add the optimization pipeline.
1593   MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
1594 
1595   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1596       PGOOpt->Action == PGOOptions::SampleUse)
1597     MPM.addPass(PseudoProbeUpdatePass());
1598 
1599   // Emit annotation remarks.
1600   addAnnotationRemarksPass(MPM);
1601 
1602   if (LTOPreLink)
1603     addRequiredLTOPreLinkPasses(MPM);
1604   return MPM;
1605 }
1606 
1607 ModulePassManager
buildFatLTODefaultPipeline(OptimizationLevel Level,bool ThinLTO,bool EmitSummary)1608 PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1609                                         bool EmitSummary) {
1610   ModulePassManager MPM;
1611   if (ThinLTO)
1612     MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
1613   else
1614     MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
1615   MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1616 
1617   // Use the ThinLTO post-link pipeline with sample profiling
1618   if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1619     MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1620   else {
1621     // otherwise, just use module optimization
1622     MPM.addPass(
1623         buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
1624     // Emit annotation remarks.
1625     addAnnotationRemarksPass(MPM);
1626   }
1627   return MPM;
1628 }
1629 
1630 ModulePassManager
buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)1631 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1632   if (Level == OptimizationLevel::O0)
1633     return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1634 
1635   ModulePassManager MPM;
1636 
1637   // Convert @llvm.global.annotations to !annotation metadata.
1638   MPM.addPass(Annotation2MetadataPass());
1639 
1640   // Force any function attributes we want the rest of the pipeline to observe.
1641   MPM.addPass(ForceFunctionAttrsPass());
1642 
1643   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1644     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1645 
1646   // Apply module pipeline start EP callback.
1647   invokePipelineStartEPCallbacks(MPM, Level);
1648 
1649   // If we are planning to perform ThinLTO later, we don't bloat the code with
1650   // unrolling/vectorization/... now. Just simplify the module as much as we
1651   // can.
1652   MPM.addPass(buildModuleSimplificationPipeline(
1653       Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1654 
1655   // Run partial inlining pass to partially inline functions that have
1656   // large bodies.
1657   // FIXME: It isn't clear whether this is really the right place to run this
1658   // in ThinLTO. Because there is another canonicalization and simplification
1659   // phase that will run after the thin link, running this here ends up with
1660   // less information than will be available later and it may grow functions in
1661   // ways that aren't beneficial.
1662   if (RunPartialInlining)
1663     MPM.addPass(PartialInlinerPass());
1664 
1665   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1666       PGOOpt->Action == PGOOptions::SampleUse)
1667     MPM.addPass(PseudoProbeUpdatePass());
1668 
1669   // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1670   // optimization is going to be done in PostLink stage, but clang can't add
1671   // callbacks there in case of in-process ThinLTO called by linker.
1672   invokeOptimizerEarlyEPCallbacks(MPM, Level);
1673   invokeOptimizerLastEPCallbacks(MPM, Level);
1674 
1675   // Emit annotation remarks.
1676   addAnnotationRemarksPass(MPM);
1677 
1678   addRequiredLTOPreLinkPasses(MPM);
1679 
1680   return MPM;
1681 }
1682 
buildThinLTODefaultPipeline(OptimizationLevel Level,const ModuleSummaryIndex * ImportSummary)1683 ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1684     OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1685   ModulePassManager MPM;
1686 
1687   if (ImportSummary) {
1688     // For ThinLTO we must apply the context disambiguation decisions early, to
1689     // ensure we can correctly match the callsites to summary data.
1690     if (EnableMemProfContextDisambiguation)
1691       MPM.addPass(MemProfContextDisambiguation(ImportSummary));
1692 
1693     // These passes import type identifier resolutions for whole-program
1694     // devirtualization and CFI. They must run early because other passes may
1695     // disturb the specific instruction patterns that these passes look for,
1696     // creating dependencies on resolutions that may not appear in the summary.
1697     //
1698     // For example, GVN may transform the pattern assume(type.test) appearing in
1699     // two basic blocks into assume(phi(type.test, type.test)), which would
1700     // transform a dependency on a WPD resolution into a dependency on a type
1701     // identifier resolution for CFI.
1702     //
1703     // Also, WPD has access to more precise information than ICP and can
1704     // devirtualize more effectively, so it should operate on the IR first.
1705     //
1706     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1707     // metadata and intrinsics.
1708     MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1709     MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1710   }
1711 
1712   if (Level == OptimizationLevel::O0) {
1713     // Run a second time to clean up any type tests left behind by WPD for use
1714     // in ICP.
1715     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1716     // Drop available_externally and unreferenced globals. This is necessary
1717     // with ThinLTO in order to avoid leaving undefined references to dead
1718     // globals in the object file.
1719     MPM.addPass(EliminateAvailableExternallyPass());
1720     MPM.addPass(GlobalDCEPass());
1721     return MPM;
1722   }
1723 
1724   // Add the core simplification pipeline.
1725   MPM.addPass(buildModuleSimplificationPipeline(
1726       Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1727 
1728   // Now add the optimization pipeline.
1729   MPM.addPass(buildModuleOptimizationPipeline(
1730       Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1731 
1732   // Emit annotation remarks.
1733   addAnnotationRemarksPass(MPM);
1734 
1735   return MPM;
1736 }
1737 
1738 ModulePassManager
buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)1739 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1740   // FIXME: We should use a customized pre-link pipeline!
1741   return buildPerModuleDefaultPipeline(Level,
1742                                        /* LTOPreLink */ true);
1743 }
1744 
1745 ModulePassManager
buildLTODefaultPipeline(OptimizationLevel Level,ModuleSummaryIndex * ExportSummary)1746 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1747                                      ModuleSummaryIndex *ExportSummary) {
1748   ModulePassManager MPM;
1749 
1750   invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1751 
1752   // Create a function that performs CFI checks for cross-DSO calls with targets
1753   // in the current module.
1754   MPM.addPass(CrossDSOCFIPass());
1755 
1756   if (Level == OptimizationLevel::O0) {
1757     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1758     // metadata and intrinsics.
1759     MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1760     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1761     // Run a second time to clean up any type tests left behind by WPD for use
1762     // in ICP.
1763     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1764 
1765     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1766 
1767     // Emit annotation remarks.
1768     addAnnotationRemarksPass(MPM);
1769 
1770     return MPM;
1771   }
1772 
1773   if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1774     // Load sample profile before running the LTO optimization pipeline.
1775     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1776                                         PGOOpt->ProfileRemappingFile,
1777                                         ThinOrFullLTOPhase::FullLTOPostLink));
1778     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1779     // RequireAnalysisPass for PSI before subsequent non-module passes.
1780     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1781   }
1782 
1783   // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1784   MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1785 
1786   // Remove unused virtual tables to improve the quality of code generated by
1787   // whole-program devirtualization and bitset lowering.
1788   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1789 
1790   // Do basic inference of function attributes from known properties of system
1791   // libraries and other oracles.
1792   MPM.addPass(InferFunctionAttrsPass());
1793 
1794   if (Level.getSpeedupLevel() > 1) {
1795     MPM.addPass(createModuleToFunctionPassAdaptor(
1796         CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1797 
1798     // Indirect call promotion. This should promote all the targets that are
1799     // left by the earlier promotion pass that promotes intra-module targets.
1800     // This two-step promotion is to save the compile time. For LTO, it should
1801     // produce the same result as if we only do promotion here.
1802     MPM.addPass(PGOIndirectCallPromotion(
1803         true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1804 
1805     // Propagate constants at call sites into the functions they call.  This
1806     // opens opportunities for globalopt (and inlining) by substituting function
1807     // pointers passed as arguments to direct uses of functions.
1808     MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1809                                          Level != OptimizationLevel::Os &&
1810                                          Level != OptimizationLevel::Oz)));
1811 
1812     // Attach metadata to indirect call sites indicating the set of functions
1813     // they may target at run-time. This should follow IPSCCP.
1814     MPM.addPass(CalledValuePropagationPass());
1815   }
1816 
1817   // Now deduce any function attributes based in the current code.
1818   MPM.addPass(
1819       createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1820 
1821   // Do RPO function attribute inference across the module to forward-propagate
1822   // attributes where applicable.
1823   // FIXME: Is this really an optimization rather than a canonicalization?
1824   MPM.addPass(ReversePostOrderFunctionAttrsPass());
1825 
1826   // Use in-range annotations on GEP indices to split globals where beneficial.
1827   MPM.addPass(GlobalSplitPass());
1828 
1829   // Run whole program optimization of virtual call when the list of callees
1830   // is fixed.
1831   MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1832 
1833   // Stop here at -O1.
1834   if (Level == OptimizationLevel::O1) {
1835     // The LowerTypeTestsPass needs to run to lower type metadata and the
1836     // type.test intrinsics. The pass does nothing if CFI is disabled.
1837     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1838     // Run a second time to clean up any type tests left behind by WPD for use
1839     // in ICP (which is performed earlier than this in the regular LTO
1840     // pipeline).
1841     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1842 
1843     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1844 
1845     // Emit annotation remarks.
1846     addAnnotationRemarksPass(MPM);
1847 
1848     return MPM;
1849   }
1850 
1851   // Optimize globals to try and fold them into constants.
1852   MPM.addPass(GlobalOptPass());
1853 
1854   // Promote any localized globals to SSA registers.
1855   MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1856 
1857   // Linking modules together can lead to duplicate global constant, only
1858   // keep one copy of each constant.
1859   MPM.addPass(ConstantMergePass());
1860 
1861   // Remove unused arguments from functions.
1862   MPM.addPass(DeadArgumentEliminationPass());
1863 
1864   // Reduce the code after globalopt and ipsccp.  Both can open up significant
1865   // simplification opportunities, and both can propagate functions through
1866   // function pointers.  When this happens, we often have to resolve varargs
1867   // calls, etc, so let instcombine do this.
1868   FunctionPassManager PeepholeFPM;
1869   PeepholeFPM.addPass(InstCombinePass());
1870   if (Level.getSpeedupLevel() > 1)
1871     PeepholeFPM.addPass(AggressiveInstCombinePass());
1872   invokePeepholeEPCallbacks(PeepholeFPM, Level);
1873 
1874   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1875                                                 PTO.EagerlyInvalidateAnalyses));
1876 
1877   // Note: historically, the PruneEH pass was run first to deduce nounwind and
1878   // generally clean up exception handling overhead. It isn't clear this is
1879   // valuable as the inliner doesn't currently care whether it is inlining an
1880   // invoke or a call.
1881   // Run the inliner now.
1882   if (EnableModuleInliner) {
1883     MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1884                                   UseInlineAdvisor,
1885                                   ThinOrFullLTOPhase::FullLTOPostLink));
1886   } else {
1887     MPM.addPass(ModuleInlinerWrapperPass(
1888         getInlineParamsFromOptLevel(Level),
1889         /* MandatoryFirst */ true,
1890         InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
1891                       InlinePass::CGSCCInliner}));
1892   }
1893 
1894   // Perform context disambiguation after inlining, since that would reduce the
1895   // amount of additional cloning required to distinguish the allocation
1896   // contexts.
1897   if (EnableMemProfContextDisambiguation)
1898     MPM.addPass(MemProfContextDisambiguation());
1899 
1900   // Optimize globals again after we ran the inliner.
1901   MPM.addPass(GlobalOptPass());
1902 
1903   // Run the OpenMPOpt pass again after global optimizations.
1904   MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1905 
1906   // Garbage collect dead functions.
1907   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1908 
1909   // If we didn't decide to inline a function, check to see if we can
1910   // transform it to pass arguments by value instead of by reference.
1911   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
1912 
1913   FunctionPassManager FPM;
1914   // The IPO Passes may leave cruft around. Clean up after them.
1915   FPM.addPass(InstCombinePass());
1916   invokePeepholeEPCallbacks(FPM, Level);
1917 
1918   if (EnableConstraintElimination)
1919     FPM.addPass(ConstraintEliminationPass());
1920 
1921   FPM.addPass(JumpThreadingPass());
1922 
1923   // Do a post inline PGO instrumentation and use pass. This is a context
1924   // sensitive PGO pass.
1925   if (PGOOpt) {
1926     if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1927       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1928                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1929                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1930                         PGOOpt->FS);
1931     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1932       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1933                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1934                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1935                         PGOOpt->FS);
1936   }
1937 
1938   // Break up allocas
1939   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1940 
1941   // LTO provides additional opportunities for tailcall elimination due to
1942   // link-time inlining, and visibility of nocapture attribute.
1943   FPM.addPass(TailCallElimPass());
1944 
1945   // Run a few AA driver optimizations here and now to cleanup the code.
1946   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
1947                                                 PTO.EagerlyInvalidateAnalyses));
1948 
1949   MPM.addPass(
1950       createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1951 
1952   // Require the GlobalsAA analysis for the module so we can query it within
1953   // MainFPM.
1954   if (EnableGlobalAnalyses) {
1955     MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
1956     // Invalidate AAManager so it can be recreated and pick up the newly
1957     // available GlobalsAA.
1958     MPM.addPass(
1959         createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
1960   }
1961 
1962   FunctionPassManager MainFPM;
1963   MainFPM.addPass(createFunctionToLoopPassAdaptor(
1964       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1965                /*AllowSpeculation=*/true),
1966       /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1967 
1968   if (RunNewGVN)
1969     MainFPM.addPass(NewGVNPass());
1970   else
1971     MainFPM.addPass(GVNPass());
1972 
1973   // Remove dead memcpy()'s.
1974   MainFPM.addPass(MemCpyOptPass());
1975 
1976   // Nuke dead stores.
1977   MainFPM.addPass(DSEPass());
1978   MainFPM.addPass(MoveAutoInitPass());
1979   MainFPM.addPass(MergedLoadStoreMotionPass());
1980 
1981   LoopPassManager LPM;
1982   if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1983     LPM.addPass(LoopFlattenPass());
1984   LPM.addPass(IndVarSimplifyPass());
1985   LPM.addPass(LoopDeletionPass());
1986   // FIXME: Add loop interchange.
1987 
1988   // Unroll small loops and perform peeling.
1989   LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1990                                  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1991                                  PTO.ForgetAllSCEVInLoopUnroll));
1992   // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1993   // *All* loop passes must preserve it, in order to be able to use it.
1994   MainFPM.addPass(createFunctionToLoopPassAdaptor(
1995       std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1996 
1997   MainFPM.addPass(LoopDistributePass());
1998 
1999   addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2000 
2001   // Run the OpenMPOpt CGSCC pass again late.
2002   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
2003       OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2004 
2005   invokePeepholeEPCallbacks(MainFPM, Level);
2006   MainFPM.addPass(JumpThreadingPass());
2007   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2008                                                 PTO.EagerlyInvalidateAnalyses));
2009 
2010   // Lower type metadata and the type.test intrinsic. This pass supports
2011   // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2012   // to be run at link time if CFI is enabled. This pass does nothing if
2013   // CFI is disabled.
2014   MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2015   // Run a second time to clean up any type tests left behind by WPD for use
2016   // in ICP (which is performed earlier than this in the regular LTO pipeline).
2017   MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
2018 
2019   // Enable splitting late in the FullLTO post-link pipeline.
2020   if (EnableHotColdSplit)
2021     MPM.addPass(HotColdSplittingPass());
2022 
2023   // Add late LTO optimization passes.
2024   FunctionPassManager LateFPM;
2025 
2026   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2027   // canonicalization pass that enables other optimizations. As a result,
2028   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2029   // result too early.
2030   LateFPM.addPass(LoopSinkPass());
2031 
2032   // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2033   // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2034   // flattening of blocks.
2035   LateFPM.addPass(DivRemPairsPass());
2036 
2037   // Delete basic blocks, which optimization passes may have killed.
2038   LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
2039                                       .convertSwitchRangeToICmp(true)
2040                                       .hoistCommonInsts(true)
2041                                       .speculateUnpredictables(true)));
2042   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2043 
2044   // Drop bodies of available eternally objects to improve GlobalDCE.
2045   MPM.addPass(EliminateAvailableExternallyPass());
2046 
2047   // Now that we have optimized the program, discard unreachable functions.
2048   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2049 
2050   if (PTO.MergeFunctions)
2051     MPM.addPass(MergeFunctionsPass());
2052 
2053   if (PTO.CallGraphProfile)
2054     MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2055 
2056   invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2057 
2058   // Emit annotation remarks.
2059   addAnnotationRemarksPass(MPM);
2060 
2061   return MPM;
2062 }
2063 
buildO0DefaultPipeline(OptimizationLevel Level,bool LTOPreLink)2064 ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2065                                                       bool LTOPreLink) {
2066   assert(Level == OptimizationLevel::O0 &&
2067          "buildO0DefaultPipeline should only be used with O0");
2068 
2069   ModulePassManager MPM;
2070 
2071   // Perform pseudo probe instrumentation in O0 mode. This is for the
2072   // consistency between different build modes. For example, a LTO build can be
2073   // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2074   // the postlink will require pseudo probe instrumentation in the prelink.
2075   if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2076     MPM.addPass(SampleProfileProbePass(TM));
2077 
2078   if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2079                  PGOOpt->Action == PGOOptions::IRUse))
2080     addPGOInstrPassesForO0(
2081         MPM,
2082         /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2083         /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2084         PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2085 
2086   // Instrument function entry and exit before all inlining.
2087   MPM.addPass(createModuleToFunctionPassAdaptor(
2088       EntryExitInstrumenterPass(/*PostInlining=*/false)));
2089 
2090   invokePipelineStartEPCallbacks(MPM, Level);
2091 
2092   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2093     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
2094 
2095   invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
2096 
2097   // Build a minimal pipeline based on the semantics required by LLVM,
2098   // which is just that always inlining occurs. Further, disable generating
2099   // lifetime intrinsics to avoid enabling further optimizations during
2100   // code generation.
2101   MPM.addPass(AlwaysInlinerPass(
2102       /*InsertLifetimeIntrinsics=*/false));
2103 
2104   if (PTO.MergeFunctions)
2105     MPM.addPass(MergeFunctionsPass());
2106 
2107   if (EnableMatrix)
2108     MPM.addPass(
2109         createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
2110 
2111   if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2112     CGSCCPassManager CGPM;
2113     invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2114     if (!CGPM.isEmpty())
2115       MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2116   }
2117   if (!LateLoopOptimizationsEPCallbacks.empty()) {
2118     LoopPassManager LPM;
2119     invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2120     if (!LPM.isEmpty()) {
2121       MPM.addPass(createModuleToFunctionPassAdaptor(
2122           createFunctionToLoopPassAdaptor(std::move(LPM))));
2123     }
2124   }
2125   if (!LoopOptimizerEndEPCallbacks.empty()) {
2126     LoopPassManager LPM;
2127     invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2128     if (!LPM.isEmpty()) {
2129       MPM.addPass(createModuleToFunctionPassAdaptor(
2130           createFunctionToLoopPassAdaptor(std::move(LPM))));
2131     }
2132   }
2133   if (!ScalarOptimizerLateEPCallbacks.empty()) {
2134     FunctionPassManager FPM;
2135     invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2136     if (!FPM.isEmpty())
2137       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2138   }
2139 
2140   invokeOptimizerEarlyEPCallbacks(MPM, Level);
2141 
2142   if (!VectorizerStartEPCallbacks.empty()) {
2143     FunctionPassManager FPM;
2144     invokeVectorizerStartEPCallbacks(FPM, Level);
2145     if (!FPM.isEmpty())
2146       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2147   }
2148 
2149   ModulePassManager CoroPM;
2150   CoroPM.addPass(CoroEarlyPass());
2151   CGSCCPassManager CGPM;
2152   CGPM.addPass(CoroSplitPass());
2153   CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2154   CoroPM.addPass(CoroCleanupPass());
2155   CoroPM.addPass(GlobalDCEPass());
2156   MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2157 
2158   invokeOptimizerLastEPCallbacks(MPM, Level);
2159 
2160   if (LTOPreLink)
2161     addRequiredLTOPreLinkPasses(MPM);
2162 
2163   MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
2164 
2165   return MPM;
2166 }
2167 
buildDefaultAAPipeline()2168 AAManager PassBuilder::buildDefaultAAPipeline() {
2169   AAManager AA;
2170 
2171   // The order in which these are registered determines their priority when
2172   // being queried.
2173 
2174   // First we register the basic alias analysis that provides the majority of
2175   // per-function local AA logic. This is a stateless, on-demand local set of
2176   // AA techniques.
2177   AA.registerFunctionAnalysis<BasicAA>();
2178 
2179   // Next we query fast, specialized alias analyses that wrap IR-embedded
2180   // information about aliasing.
2181   AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2182   AA.registerFunctionAnalysis<TypeBasedAA>();
2183 
2184   // Add support for querying global aliasing information when available.
2185   // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2186   // analysis, all that the `AAManager` can do is query for any *cached*
2187   // results from `GlobalsAA` through a readonly proxy.
2188   if (EnableGlobalAnalyses)
2189     AA.registerModuleAnalysis<GlobalsAA>();
2190 
2191   // Add target-specific alias analyses.
2192   if (TM)
2193     TM->registerDefaultAliasAnalyses(AA);
2194 
2195   return AA;
2196 }
2197