1 //===- FunctionSpecialization.h - Function Specialization -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Overview:
10 // ---------
11 // Function Specialization is a transformation which propagates the constant
12 // parameters of a function call from the caller to the callee. It is part of
13 // the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass.
14 // The transformation runs iteratively a number of times which is controlled
15 // by the option `funcspec-max-iters`. Running it multiple times is needed
16 // for specializing recursive functions, but also exposes new opportunities
17 // arising from specializations which return constant values or contain calls
18 // which can be specialized.
19 //
20 // Function Specialization supports propagating constant parameters like
21 // function pointers, literal constants and addresses of global variables.
22 // By propagating function pointers, indirect calls become direct calls. This
23 // exposes inlining opportunities which we would have otherwise missed. That's
24 // why function specialization is run before the inliner in the optimization
25 // pipeline; that is by design.
26 //
27 // Cost Model:
28 // -----------
29 // The cost model facilitates a utility for estimating the specialization bonus
30 // from propagating a constant argument. This is the InstCostVisitor, a class
31 // that inherits from the InstVisitor. The bonus itself is expressed as codesize
32 // and latency savings. Codesize savings means the amount of code that becomes
33 // dead in the specialization from propagating the constant, whereas latency
34 // savings represents the cycles we are saving from replacing instructions with
35 // constant values. The InstCostVisitor overrides a set of `visit*` methods to
36 // be able to handle different types of instructions. These attempt to constant-
37 // fold the instruction in which case a constant is returned and propagated
38 // further.
39 //
40 // Function pointers are not handled by the InstCostVisitor. They are treated
41 // separately as they could expose inlining opportunities via indirect call
42 // promotion. The inlining bonus contributes to the total specialization score.
43 //
44 // For a specialization to be profitable its bonus needs to exceed a minimum
45 // threshold. There are three options for controlling the threshold which are
46 // expressed as percentages of the original function size:
47 //  * funcspec-min-codesize-savings
48 //  * funcspec-min-latency-savings
49 //  * funcspec-min-inlining-bonus
50 // There's also an option for controlling the codesize growth from recursive
51 // specializations. That is `funcspec-max-codesize-growth`.
52 //
53 // Once we have all the potential specializations with their score we need to
54 // choose the best ones, which fit in the module specialization budget. That
55 // is controlled by the option `funcspec-max-clones`. To find the best `NSpec`
56 // specializations we use a max-heap. For more details refer to D139346.
57 //
58 // Ideas:
59 // ------
60 // - With a function specialization attribute for arguments, we could have
61 //   a direct way to steer function specialization, avoiding the cost-model,
62 //   and thus control compile-times / code-size.
63 //
64 // - Perhaps a post-inlining function specialization pass could be more
65 //   aggressive on literal constants.
66 //
67 // References:
68 // -----------
69 // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
70 // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
71 //
72 //===----------------------------------------------------------------------===//
73 
74 #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
75 #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
76 
77 #include "llvm/Analysis/BlockFrequencyInfo.h"
78 #include "llvm/Analysis/CodeMetrics.h"
79 #include "llvm/Analysis/InlineCost.h"
80 #include "llvm/Analysis/TargetTransformInfo.h"
81 #include "llvm/IR/InstVisitor.h"
82 #include "llvm/Transforms/Scalar/SCCP.h"
83 #include "llvm/Transforms/Utils/Cloning.h"
84 #include "llvm/Transforms/Utils/SCCPSolver.h"
85 #include "llvm/Transforms/Utils/SizeOpts.h"
86 
87 namespace llvm {
88 // Map of potential specializations for each function. The FunctionSpecializer
89 // keeps the discovered specialisation opportunities for the module in a single
90 // vector, where the specialisations of each function form a contiguous range.
91 // This map's value is the beginning and the end of that range.
92 using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
93 
94 // Just a shorter abbreviation to improve indentation.
95 using Cost = InstructionCost;
96 
97 // Map of known constants found during the specialization bonus estimation.
98 using ConstMap = DenseMap<Value *, Constant *>;
99 
100 // Specialization signature, used to uniquely designate a specialization within
101 // a function.
102 struct SpecSig {
103   // Hashing support, used to distinguish between ordinary, empty, or tombstone
104   // keys.
105   unsigned Key = 0;
106   SmallVector<ArgInfo, 4> Args;
107 
108   bool operator==(const SpecSig &Other) const {
109     if (Key != Other.Key)
110       return false;
111     return Args == Other.Args;
112   }
113 
114   friend hash_code hash_value(const SpecSig &S) {
115     return hash_combine(hash_value(S.Key),
116                         hash_combine_range(S.Args.begin(), S.Args.end()));
117   }
118 };
119 
120 // Specialization instance.
121 struct Spec {
122   // Original function.
123   Function *F;
124 
125   // Cloned function, a specialized version of the original one.
126   Function *Clone = nullptr;
127 
128   // Specialization signature.
129   SpecSig Sig;
130 
131   // Profitability of the specialization.
132   unsigned Score;
133 
134   // List of call sites, matching this specialization.
135   SmallVector<CallBase *> CallSites;
136 
137   Spec(Function *F, const SpecSig &S, unsigned Score)
138       : F(F), Sig(S), Score(Score) {}
139   Spec(Function *F, const SpecSig &&S, unsigned Score)
140       : F(F), Sig(S), Score(Score) {}
141 };
142 
143 struct Bonus {
144   unsigned CodeSize = 0;
145   unsigned Latency = 0;
146 
147   Bonus() = default;
148 
149   Bonus(Cost CodeSize, Cost Latency) {
150     int64_t Sz = *CodeSize.getValue();
151     int64_t Ltc = *Latency.getValue();
152 
153     assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative");
154     // It is safe to down cast since we know the arguments
155     // cannot be negative and Cost is of type int64_t.
156     this->CodeSize = static_cast<unsigned>(Sz);
157     this->Latency = static_cast<unsigned>(Ltc);
158   }
159 
160   Bonus &operator+=(const Bonus RHS) {
161     CodeSize += RHS.CodeSize;
162     Latency += RHS.Latency;
163     return *this;
164   }
165 
166   Bonus operator+(const Bonus RHS) const {
167     return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency);
168   }
169 
170   bool operator==(const Bonus RHS) const {
171     return CodeSize == RHS.CodeSize && Latency == RHS.Latency;
172   }
173 };
174 
175 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
176   const DataLayout &DL;
177   BlockFrequencyInfo &BFI;
178   TargetTransformInfo &TTI;
179   SCCPSolver &Solver;
180 
181   ConstMap KnownConstants;
182   // Basic blocks known to be unreachable after constant propagation.
183   DenseSet<BasicBlock *> DeadBlocks;
184   // PHI nodes we have visited before.
185   DenseSet<Instruction *> VisitedPHIs;
186   // PHI nodes we have visited once without successfully constant folding them.
187   // Once the InstCostVisitor has processed all the specialization arguments,
188   // it should be possible to determine whether those PHIs can be folded
189   // (some of their incoming values may have become constant or dead).
190   SmallVector<Instruction *> PendingPHIs;
191 
192   ConstMap::iterator LastVisited;
193 
194 public:
195   InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI,
196                   TargetTransformInfo &TTI, SCCPSolver &Solver)
197       : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
198 
199   bool isBlockExecutable(BasicBlock *BB) {
200     return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
201   }
202 
203   Bonus getSpecializationBonus(Argument *A, Constant *C);
204 
205   Bonus getBonusFromPendingPHIs();
206 
207 private:
208   friend class InstVisitor<InstCostVisitor, Constant *>;
209 
210   static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
211                                     DenseSet<BasicBlock *> &DeadBlocks);
212 
213   Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
214                      Constant *C = nullptr);
215 
216   Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
217   Cost estimateSwitchInst(SwitchInst &I);
218   Cost estimateBranchInst(BranchInst &I);
219 
220   // Transitively Incoming Values (TIV) is a set of Values that can "feed" a
221   // value to the initial PHI-node. It is defined like this:
222   //
223   // * the initial PHI-node belongs to TIV.
224   //
225   // * for every PHI-node in TIV, its operands belong to TIV
226   //
227   // If TIV for the initial PHI-node (P) contains more than one constant or a
228   // value that is not a PHI-node, then P cannot be folded to a constant.
229   //
230   // As soon as we detect these cases, we bail, without constructing the
231   // full TIV.
232   // Otherwise P can be folded to the one constant in TIV.
233   bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root,
234                                           DenseSet<PHINode *> &TransitivePHIs);
235 
236   Constant *visitInstruction(Instruction &I) { return nullptr; }
237   Constant *visitPHINode(PHINode &I);
238   Constant *visitFreezeInst(FreezeInst &I);
239   Constant *visitCallBase(CallBase &I);
240   Constant *visitLoadInst(LoadInst &I);
241   Constant *visitGetElementPtrInst(GetElementPtrInst &I);
242   Constant *visitSelectInst(SelectInst &I);
243   Constant *visitCastInst(CastInst &I);
244   Constant *visitCmpInst(CmpInst &I);
245   Constant *visitUnaryOperator(UnaryOperator &I);
246   Constant *visitBinaryOperator(BinaryOperator &I);
247 };
248 
249 class FunctionSpecializer {
250 
251   /// The IPSCCP Solver.
252   SCCPSolver &Solver;
253 
254   Module &M;
255 
256   /// Analysis manager, needed to invalidate analyses.
257   FunctionAnalysisManager *FAM;
258 
259   /// Analyses used to help determine if a function should be specialized.
260   std::function<BlockFrequencyInfo &(Function &)> GetBFI;
261   std::function<const TargetLibraryInfo &(Function &)> GetTLI;
262   std::function<TargetTransformInfo &(Function &)> GetTTI;
263   std::function<AssumptionCache &(Function &)> GetAC;
264 
265   SmallPtrSet<Function *, 32> Specializations;
266   SmallPtrSet<Function *, 32> FullySpecialized;
267   DenseMap<Function *, CodeMetrics> FunctionMetrics;
268   DenseMap<Function *, unsigned> FunctionGrowth;
269   unsigned NGlobals = 0;
270 
271 public:
272   FunctionSpecializer(
273       SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM,
274       std::function<BlockFrequencyInfo &(Function &)> GetBFI,
275       std::function<const TargetLibraryInfo &(Function &)> GetTLI,
276       std::function<TargetTransformInfo &(Function &)> GetTTI,
277       std::function<AssumptionCache &(Function &)> GetAC)
278       : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI),
279         GetTTI(GetTTI), GetAC(GetAC) {}
280 
281   ~FunctionSpecializer();
282 
283   bool run();
284 
285   InstCostVisitor getInstCostVisitorFor(Function *F) {
286     auto &BFI = GetBFI(*F);
287     auto &TTI = GetTTI(*F);
288     return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
289   }
290 
291 private:
292   Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
293 
294   /// A constant stack value is an AllocaInst that has a single constant
295   /// value stored to it. Return this constant if such an alloca stack value
296   /// is a function argument.
297   Constant *getConstantStackValue(CallInst *Call, Value *Val);
298 
299   /// See if there are any new constant values for the callers of \p F via
300   /// stack variables and promote them to global variables.
301   void promoteConstantStackValues(Function *F);
302 
303   /// Clean up fully specialized functions.
304   void removeDeadFunctions();
305 
306   /// Remove any ssa_copy intrinsics that may have been introduced.
307   void cleanUpSSA();
308 
309   /// @brief  Find potential specialization opportunities.
310   /// @param F Function to specialize
311   /// @param FuncSize Cost of specializing a function.
312   /// @param AllSpecs A vector to add potential specializations to.
313   /// @param SM  A map for a function's specialisation range
314   /// @return True, if any potential specializations were found
315   bool findSpecializations(Function *F, unsigned FuncSize,
316                            SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
317 
318   /// Compute the inlining bonus for replacing argument \p A with constant \p C.
319   unsigned getInliningBonus(Argument *A, Constant *C);
320 
321   bool isCandidateFunction(Function *F);
322 
323   /// @brief Create a specialization of \p F and prime the SCCPSolver
324   /// @param F Function to specialize
325   /// @param S Which specialization to create
326   /// @return The new, cloned function
327   Function *createSpecialization(Function *F, const SpecSig &S);
328 
329   /// Determine if it is possible to specialise the function for constant values
330   /// of the formal parameter \p A.
331   bool isArgumentInteresting(Argument *A);
332 
333   /// Check if the value \p V  (an actual argument) is a constant or can only
334   /// have a constant value. Return that constant.
335   Constant *getCandidateConstant(Value *V);
336 
337   /// @brief Find and update calls to \p F, which match a specialization
338   /// @param F Orginal function
339   /// @param Begin Start of a range of possibly matching specialisations
340   /// @param End End of a range (exclusive) of possibly matching specialisations
341   void updateCallSites(Function *F, const Spec *Begin, const Spec *End);
342 };
343 } // namespace llvm
344 
345 #endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
346