1 //===- IROutliner.h - Extract similar IR regions into functions ------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // The interface file for the IROutliner which is used by the IROutliner Pass.
11 //
12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions
13 // of code.  It evaluates each set of IRSimilarityCandidates with an estimate of
14 // whether it will provide code size reduction.  Each region is extracted using
15 // the code extractor.  These extracted functions are consolidated into a single
16 // function and called from the extracted call site.
17 //
18 // For example:
19 // \code
20 //   %1 = add i32 %a, %b
21 //   %2 = add i32 %b, %a
22 //   %3 = add i32 %b, %a
23 //   %4 = add i32 %a, %b
24 // \endcode
25 // would become function
26 // \code
27 // define internal void outlined_ir_function(i32 %0, i32 %1) {
28 //   %1 = add i32 %0, %1
29 //   %2 = add i32 %1, %0
30 //   ret void
31 // }
32 // \endcode
33 // with calls:
34 // \code
35 //   call void outlined_ir_function(i32 %a, i32 %b)
36 //   call void outlined_ir_function(i32 %b, i32 %a)
37 // \endcode
38 //
39 //===----------------------------------------------------------------------===//
40 
41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43 
44 #include "llvm/Analysis/IRSimilarityIdentifier.h"
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/IR/ValueMap.h"
47 #include "llvm/Support/InstructionCost.h"
48 #include "llvm/Transforms/Utils/CodeExtractor.h"
49 #include <set>
50 
51 struct OutlinableGroup;
52 
53 namespace llvm {
54 using namespace IRSimilarity;
55 
56 class Module;
57 class TargetTransformInfo;
58 class OptimizationRemarkEmitter;
59 
60 /// The OutlinableRegion holds all the information for a specific region, or
61 /// sequence of instructions. This includes what values need to be hoisted to
62 /// arguments from the extracted function, inputs and outputs to the region, and
63 /// mapping from the extracted function arguments to overall function arguments.
64 struct OutlinableRegion {
65   /// Describes the region of code.
66   IRSimilarityCandidate *Candidate = nullptr;
67 
68   /// If this region is outlined, the front and back IRInstructionData could
69   /// potentially become invalidated if the only new instruction is a call.
70   /// This ensures that we replace in the instruction in the IRInstructionData.
71   IRInstructionData *NewFront = nullptr;
72   IRInstructionData *NewBack = nullptr;
73 
74   /// The number of extracted inputs from the CodeExtractor.
75   unsigned NumExtractedInputs = 0;
76 
77   /// The corresponding BasicBlock with the appropriate stores for this
78   /// OutlinableRegion in the overall function.
79   unsigned OutputBlockNum = -1;
80 
81   /// Mapping the extracted argument number to the argument number in the
82   /// overall function.  Since there will be inputs, such as elevated constants
83   /// that are not the same in each region in a SimilarityGroup, or values that
84   /// cannot be sunk into the extracted section in every region, we must keep
85   /// track of which extracted argument maps to which overall argument.
86   DenseMap<unsigned, unsigned> ExtractedArgToAgg;
87   DenseMap<unsigned, unsigned> AggArgToExtracted;
88 
89   /// Mapping of the argument number in the deduplicated function
90   /// to a given constant, which is used when creating the arguments to the call
91   /// to the newly created deduplicated function.  This is handled separately
92   /// since the CodeExtractor does not recognize constants.
93   DenseMap<unsigned, Constant *> AggArgToConstant;
94 
95   /// The global value numbers that are used as outputs for this section. Once
96   /// extracted, each output will be stored to an output register.  This
97   /// documents the global value numbers that are used in this pattern.
98   SmallVector<unsigned, 4> GVNStores;
99 
100   /// Used to create an outlined function.
101   CodeExtractor *CE = nullptr;
102 
103   /// The call site of the extracted region.
104   CallInst *Call = nullptr;
105 
106   /// The function for the extracted region.
107   Function *ExtractedFunction = nullptr;
108 
109   /// Flag for whether we have split out the IRSimilarityCanidate. That is,
110   /// make the region contained the IRSimilarityCandidate its own BasicBlock.
111   bool CandidateSplit = false;
112 
113   /// Flag for whether we should not consider this region for extraction.
114   bool IgnoreRegion = false;
115 
116   /// The BasicBlock that is before the start of the region BasicBlock,
117   /// only defined when the region has been split.
118   BasicBlock *PrevBB = nullptr;
119 
120   /// The BasicBlock that contains the starting instruction of the region.
121   BasicBlock *StartBB = nullptr;
122 
123   /// The BasicBlock that contains the ending instruction of the region.
124   BasicBlock *EndBB = nullptr;
125 
126   /// The BasicBlock that is after the start of the region BasicBlock,
127   /// only defined when the region has been split.
128   BasicBlock *FollowBB = nullptr;
129 
130   /// The Outlinable Group that contains this region and structurally similar
131   /// regions to this region.
132   OutlinableGroup *Parent = nullptr;
133 
134   OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
135       : Candidate(&C), Parent(&Group) {
136     StartBB = C.getStartBB();
137     EndBB = C.getEndBB();
138   }
139 
140   /// For the contained region, split the parent BasicBlock at the starting and
141   /// ending instructions of the contained IRSimilarityCandidate.
142   void splitCandidate();
143 
144   /// For the contained region, reattach the BasicBlock at the starting and
145   /// ending instructions of the contained IRSimilarityCandidate, or if the
146   /// function has been extracted, the start and end of the BasicBlock
147   /// containing the called function.
148   void reattachCandidate();
149 
150   /// Get the size of the code removed from the region.
151   ///
152   /// \param [in] TTI - The TargetTransformInfo for the parent function.
153   /// \returns the code size of the region
154   InstructionCost getBenefit(TargetTransformInfo &TTI);
155 };
156 
157 /// This class is a pass that identifies similarity in a Module, extracts
158 /// instances of the similarity, and then consolidating the similar regions
159 /// in an effort to reduce code size.  It uses the IRSimilarityIdentifier pass
160 /// to identify the similar regions of code, and then extracts the similar
161 /// sections into a single function.  See the above for an example as to
162 /// how code is extracted and consolidated into a single function.
163 class IROutliner {
164 public:
165   IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
166              function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
167              function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
168       : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
169   bool run(Module &M);
170 
171 private:
172   /// Find repeated similar code sequences in \p M and outline them into new
173   /// Functions.
174   ///
175   /// \param [in] M - The module to outline from.
176   /// \returns The number of Functions created.
177   unsigned doOutline(Module &M);
178 
179   /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
180   /// instructions contained in a previously outlined region and put the
181   /// remaining regions in \p CurrentGroup.
182   ///
183   /// \param [in] CandidateVec - List of similarity candidates for regions with
184   /// the same similarity structure.
185   /// \param [in,out] CurrentGroup - Contains the potential sections to
186   /// be outlined.
187   void
188   pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
189                            OutlinableGroup &CurrentGroup);
190 
191   /// Create the function based on the overall types found in the current
192   /// regions being outlined.
193   ///
194   /// \param M - The module to outline from.
195   /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
196   /// \param [in] FunctionNameSuffix - How many functions have we previously
197   /// created.
198   /// \returns the newly created function.
199   Function *createFunction(Module &M, OutlinableGroup &CG,
200                            unsigned FunctionNameSuffix);
201 
202   /// Identify the needed extracted inputs in a section, and add to the overall
203   /// function if needed.
204   ///
205   /// \param [in] M - The module to outline from.
206   /// \param [in,out] Region - The region to be extracted.
207   /// \param [in] NotSame - The global value numbers of the Values in the region
208   /// that do not have the same Constant in each strucutrally similar region.
209   void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
210                             DenseSet<unsigned> &NotSame);
211 
212   /// Find the number of instructions that will be removed by extracting the
213   /// OutlinableRegions in \p CurrentGroup.
214   ///
215   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
216   /// analyzed.
217   /// \returns the number of outlined instructions across all regions.
218   InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
219 
220   /// Find the number of instructions that will be added by reloading arguments.
221   ///
222   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
223   /// analyzed.
224   /// \returns the number of added reload instructions across all regions.
225   InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
226 
227   /// Find the cost and the benefit of \p CurrentGroup and save it back to
228   /// \p CurrentGroup.
229   ///
230   /// \param [in] M - The module being analyzed
231   /// \param [in,out] CurrentGroup - The overall outlined section
232   void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
233 
234   /// Update the output mapping based on the load instruction, and the outputs
235   /// of the extracted function.
236   ///
237   /// \param Region - The region extracted
238   /// \param Outputs - The outputs from the extracted function.
239   /// \param LI - The load instruction used to update the mapping.
240   void updateOutputMapping(OutlinableRegion &Region,
241                            ArrayRef<Value *> Outputs, LoadInst *LI);
242 
243   /// Extract \p Region into its own function.
244   ///
245   /// \param [in] Region - The region to be extracted into its own function.
246   /// \returns True if it was successfully outlined.
247   bool extractSection(OutlinableRegion &Region);
248 
249   /// For the similarities found, and the extracted sections, create a single
250   /// outlined function with appropriate output blocks as necessary.
251   ///
252   /// \param [in] M - The module to outline from
253   /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
254   /// \param [in,out] FuncsToRemove - List of functions to remove from the
255   /// module after outlining is completed.
256   /// \param [in,out] OutlinedFunctionNum - the number of new outlined
257   /// functions.
258   void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
259                                     std::vector<Function *> &FuncsToRemove,
260                                     unsigned &OutlinedFunctionNum);
261 
262   /// If true, enables us to outline from functions that have LinkOnceFromODR
263   /// linkages.
264   bool OutlineFromLinkODRs = false;
265 
266   /// If false, we do not worry if the cost is greater than the benefit.  This
267   /// is for debugging and testing, so that we can test small cases to ensure
268   /// that the outlining is being done correctly.
269   bool CostModel = true;
270 
271   /// The set of outlined Instructions, identified by their location in the
272   /// sequential ordering of instructions in a Module.
273   DenseSet<unsigned> Outlined;
274 
275   /// TargetTransformInfo lambda for target specific information.
276   function_ref<TargetTransformInfo &(Function &)> getTTI;
277 
278   /// A mapping from newly created reloaded output values to the original value.
279   /// If an value is replace by an output from an outlined region, this maps
280   /// that Value, back to its original Value.
281   DenseMap<Value *, Value *> OutputMappings;
282 
283   /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
284   function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
285 
286   /// The optimization remark emitter for the pass.
287   function_ref<OptimizationRemarkEmitter &(Function &)> getORE;
288 
289   /// The memory allocator used to allocate the CodeExtractors.
290   SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
291 
292   /// The memory allocator used to allocate the OutlinableRegions.
293   SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
294 
295   /// The memory allocator used to allocate new IRInstructionData.
296   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
297 
298   /// Custom InstVisitor to classify different instructions for whether it can
299   /// be analyzed for similarity.  This is needed as there may be instruction we
300   /// can identify as having similarity, but are more complicated to outline.
301   struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
302     InstructionAllowed() {}
303 
304     // TODO: Determine a scheme to resolve when the label is similar enough.
305     bool visitBranchInst(BranchInst &BI) { return false; }
306     // TODO: Determine a scheme to resolve when the labels are similar enough.
307     bool visitPHINode(PHINode &PN) { return false; }
308     // TODO: Handle allocas.
309     bool visitAllocaInst(AllocaInst &AI) { return false; }
310     // VAArg instructions are not allowed since this could cause difficulty when
311     // differentiating between different sets of variable instructions in
312     // the deduplicated outlined regions.
313     bool visitVAArgInst(VAArgInst &VI) { return false; }
314     // We exclude all exception handling cases since they are so context
315     // dependent.
316     bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
317     bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
318     // DebugInfo should be included in the regions, but should not be
319     // analyzed for similarity as it has no bearing on the outcome of the
320     // program.
321     bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
322     // TODO: Handle specific intrinsics individually from those that can be
323     // handled.
324     bool IntrinsicInst(IntrinsicInst &II) { return false; }
325     // We only handle CallInsts that are not indirect, since we cannot guarantee
326     // that they have a name in these cases.
327     bool visitCallInst(CallInst &CI) {
328       Function *F = CI.getCalledFunction();
329       if (!F || CI.isIndirectCall() || !F->hasName())
330         return false;
331       return true;
332     }
333     // TODO: Handle FreezeInsts.  Since a frozen value could be frozen inside
334     // the outlined region, and then returned as an output, this will have to be
335     // handled differently.
336     bool visitFreezeInst(FreezeInst &CI) { return false; }
337     // TODO: We do not current handle similarity that changes the control flow.
338     bool visitInvokeInst(InvokeInst &II) { return false; }
339     // TODO: We do not current handle similarity that changes the control flow.
340     bool visitCallBrInst(CallBrInst &CBI) { return false; }
341     // TODO: Handle interblock similarity.
342     bool visitTerminator(Instruction &I) { return false; }
343     bool visitInstruction(Instruction &I) { return true; }
344   };
345 
346   /// A InstVisitor used to exclude certain instructions from being outlined.
347   InstructionAllowed InstructionClassifier;
348 };
349 
350 /// Pass to outline similar regions.
351 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
352 public:
353   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
354 };
355 
356 } // end namespace llvm
357 
358 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
359