1 //===- IROutliner.h - Extract similar IR regions into functions ------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // The interface file for the IROutliner which is used by the IROutliner Pass.
11 //
12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions
13 // of code.  It evaluates each set of IRSimilarityCandidates with an estimate of
14 // whether it will provide code size reduction.  Each region is extracted using
15 // the code extractor.  These extracted functions are consolidated into a single
16 // function and called from the extracted call site.
17 //
18 // For example:
19 // \code
20 //   %1 = add i32 %a, %b
21 //   %2 = add i32 %b, %a
22 //   %3 = add i32 %b, %a
23 //   %4 = add i32 %a, %b
24 // \endcode
25 // would become function
26 // \code
27 // define internal void outlined_ir_function(i32 %0, i32 %1) {
28 //   %1 = add i32 %0, %1
29 //   %2 = add i32 %1, %0
30 //   ret void
31 // }
32 // \endcode
33 // with calls:
34 // \code
35 //   call void outlined_ir_function(i32 %a, i32 %b)
36 //   call void outlined_ir_function(i32 %b, i32 %a)
37 // \endcode
38 //
39 //===----------------------------------------------------------------------===//
40 
41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43 
44 #include "llvm/Analysis/IRSimilarityIdentifier.h"
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/IR/ValueMap.h"
47 #include "llvm/Support/InstructionCost.h"
48 #include "llvm/Transforms/Utils/CodeExtractor.h"
49 #include <set>
50 
51 struct OutlinableGroup;
52 
53 namespace llvm {
54 using namespace IRSimilarity;
55 
56 class Module;
57 class TargetTransformInfo;
58 class OptimizationRemarkEmitter;
59 
60 /// The OutlinableRegion holds all the information for a specific region, or
61 /// sequence of instructions. This includes what values need to be hoisted to
62 /// arguments from the extracted function, inputs and outputs to the region, and
63 /// mapping from the extracted function arguments to overall function arguments.
64 struct OutlinableRegion {
65   /// Describes the region of code.
66   IRSimilarityCandidate *Candidate = nullptr;
67 
68   /// If this region is outlined, the front and back IRInstructionData could
69   /// potentially become invalidated if the only new instruction is a call.
70   /// This ensures that we replace in the instruction in the IRInstructionData.
71   IRInstructionData *NewFront = nullptr;
72   IRInstructionData *NewBack = nullptr;
73 
74   /// The number of extracted inputs from the CodeExtractor.
75   unsigned NumExtractedInputs = 0;
76 
77   /// The corresponding BasicBlock with the appropriate stores for this
78   /// OutlinableRegion in the overall function.
79   unsigned OutputBlockNum = -1;
80 
81   /// Mapping the extracted argument number to the argument number in the
82   /// overall function.  Since there will be inputs, such as elevated constants
83   /// that are not the same in each region in a SimilarityGroup, or values that
84   /// cannot be sunk into the extracted section in every region, we must keep
85   /// track of which extracted argument maps to which overall argument.
86   DenseMap<unsigned, unsigned> ExtractedArgToAgg;
87   DenseMap<unsigned, unsigned> AggArgToExtracted;
88 
89   /// Marks whether we need to change the order of the arguments when mapping
90   /// the old extracted function call to the new aggregate outlined function
91   /// call.
92   bool ChangedArgOrder = false;
93 
94   /// Marks whether this region ends in a branch, there is special handling
95   /// required for the following basic blocks in this case.
96   bool EndsInBranch = false;
97 
98   /// The PHIBlocks with their corresponding return block based on the return
99   /// value as the key.
100   DenseMap<Value *, BasicBlock *> PHIBlocks;
101 
102   /// Mapping of the argument number in the deduplicated function
103   /// to a given constant, which is used when creating the arguments to the call
104   /// to the newly created deduplicated function.  This is handled separately
105   /// since the CodeExtractor does not recognize constants.
106   DenseMap<unsigned, Constant *> AggArgToConstant;
107 
108   /// The global value numbers that are used as outputs for this section. Once
109   /// extracted, each output will be stored to an output register.  This
110   /// documents the global value numbers that are used in this pattern.
111   SmallVector<unsigned, 4> GVNStores;
112 
113   /// Used to create an outlined function.
114   CodeExtractor *CE = nullptr;
115 
116   /// The call site of the extracted region.
117   CallInst *Call = nullptr;
118 
119   /// The function for the extracted region.
120   Function *ExtractedFunction = nullptr;
121 
122   /// Flag for whether we have split out the IRSimilarityCanidate. That is,
123   /// make the region contained the IRSimilarityCandidate its own BasicBlock.
124   bool CandidateSplit = false;
125 
126   /// Flag for whether we should not consider this region for extraction.
127   bool IgnoreRegion = false;
128 
129   /// The BasicBlock that is before the start of the region BasicBlock,
130   /// only defined when the region has been split.
131   BasicBlock *PrevBB = nullptr;
132 
133   /// The BasicBlock that contains the starting instruction of the region.
134   BasicBlock *StartBB = nullptr;
135 
136   /// The BasicBlock that contains the ending instruction of the region.
137   BasicBlock *EndBB = nullptr;
138 
139   /// The BasicBlock that is after the start of the region BasicBlock,
140   /// only defined when the region has been split.
141   BasicBlock *FollowBB = nullptr;
142 
143   /// The Outlinable Group that contains this region and structurally similar
144   /// regions to this region.
145   OutlinableGroup *Parent = nullptr;
146 
147   OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
148       : Candidate(&C), Parent(&Group) {
149     StartBB = C.getStartBB();
150     EndBB = C.getEndBB();
151   }
152 
153   /// For the contained region, split the parent BasicBlock at the starting and
154   /// ending instructions of the contained IRSimilarityCandidate.
155   void splitCandidate();
156 
157   /// For the contained region, reattach the BasicBlock at the starting and
158   /// ending instructions of the contained IRSimilarityCandidate, or if the
159   /// function has been extracted, the start and end of the BasicBlock
160   /// containing the called function.
161   void reattachCandidate();
162 
163   /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
164   ///
165   /// \param Other [in] - The OutlinableRegion to find the corresponding Value
166   /// in.
167   /// \param V [in] - The Value to look for in the other region.
168   /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
169   Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
170 
171   /// Get the size of the code removed from the region.
172   ///
173   /// \param [in] TTI - The TargetTransformInfo for the parent function.
174   /// \returns the code size of the region
175   InstructionCost getBenefit(TargetTransformInfo &TTI);
176 };
177 
178 /// This class is a pass that identifies similarity in a Module, extracts
179 /// instances of the similarity, and then consolidating the similar regions
180 /// in an effort to reduce code size.  It uses the IRSimilarityIdentifier pass
181 /// to identify the similar regions of code, and then extracts the similar
182 /// sections into a single function.  See the above for an example as to
183 /// how code is extracted and consolidated into a single function.
184 class IROutliner {
185 public:
186   IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
187              function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
188              function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
189       : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {
190 
191     // Check that the DenseMap implementation has not changed.
192     assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
193            "DenseMapInfo<unsigned>'s empty key isn't -1!");
194     assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
195            "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
196   }
197   bool run(Module &M);
198 
199 private:
200   /// Find repeated similar code sequences in \p M and outline them into new
201   /// Functions.
202   ///
203   /// \param [in] M - The module to outline from.
204   /// \returns The number of Functions created.
205   unsigned doOutline(Module &M);
206 
207   /// Check whether an OutlinableRegion is incompatible with code already
208   /// outlined. OutlinableRegions are incomptaible when there are overlapping
209   /// instructions, or code that has not been recorded has been added to the
210   /// instructions.
211   ///
212   /// \param [in] Region - The OutlinableRegion to check for conflicts with
213   /// already outlined code.
214   /// \returns whether the region can safely be outlined.
215   bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
216 
217   /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
218   /// instructions contained in a previously outlined region and put the
219   /// remaining regions in \p CurrentGroup.
220   ///
221   /// \param [in] CandidateVec - List of similarity candidates for regions with
222   /// the same similarity structure.
223   /// \param [in,out] CurrentGroup - Contains the potential sections to
224   /// be outlined.
225   void
226   pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
227                            OutlinableGroup &CurrentGroup);
228 
229   /// Create the function based on the overall types found in the current
230   /// regions being outlined.
231   ///
232   /// \param M - The module to outline from.
233   /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
234   /// \param [in] FunctionNameSuffix - How many functions have we previously
235   /// created.
236   /// \returns the newly created function.
237   Function *createFunction(Module &M, OutlinableGroup &CG,
238                            unsigned FunctionNameSuffix);
239 
240   /// Identify the needed extracted inputs in a section, and add to the overall
241   /// function if needed.
242   ///
243   /// \param [in] M - The module to outline from.
244   /// \param [in,out] Region - The region to be extracted.
245   /// \param [in] NotSame - The global value numbers of the Values in the region
246   /// that do not have the same Constant in each strucutrally similar region.
247   void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
248                             DenseSet<unsigned> &NotSame);
249 
250   /// Find the number of instructions that will be removed by extracting the
251   /// OutlinableRegions in \p CurrentGroup.
252   ///
253   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
254   /// analyzed.
255   /// \returns the number of outlined instructions across all regions.
256   InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
257 
258   /// Find the number of instructions that will be added by reloading arguments.
259   ///
260   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
261   /// analyzed.
262   /// \returns the number of added reload instructions across all regions.
263   InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
264 
265   /// Find the cost and the benefit of \p CurrentGroup and save it back to
266   /// \p CurrentGroup.
267   ///
268   /// \param [in] M - The module being analyzed
269   /// \param [in,out] CurrentGroup - The overall outlined section
270   void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
271 
272   /// Update the output mapping based on the load instruction, and the outputs
273   /// of the extracted function.
274   ///
275   /// \param Region - The region extracted
276   /// \param Outputs - The outputs from the extracted function.
277   /// \param LI - The load instruction used to update the mapping.
278   void updateOutputMapping(OutlinableRegion &Region,
279                            ArrayRef<Value *> Outputs, LoadInst *LI);
280 
281   /// Extract \p Region into its own function.
282   ///
283   /// \param [in] Region - The region to be extracted into its own function.
284   /// \returns True if it was successfully outlined.
285   bool extractSection(OutlinableRegion &Region);
286 
287   /// For the similarities found, and the extracted sections, create a single
288   /// outlined function with appropriate output blocks as necessary.
289   ///
290   /// \param [in] M - The module to outline from
291   /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
292   /// \param [in,out] FuncsToRemove - List of functions to remove from the
293   /// module after outlining is completed.
294   /// \param [in,out] OutlinedFunctionNum - the number of new outlined
295   /// functions.
296   void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
297                                     std::vector<Function *> &FuncsToRemove,
298                                     unsigned &OutlinedFunctionNum);
299 
300   /// If true, enables us to outline from functions that have LinkOnceFromODR
301   /// linkages.
302   bool OutlineFromLinkODRs = false;
303 
304   /// If false, we do not worry if the cost is greater than the benefit.  This
305   /// is for debugging and testing, so that we can test small cases to ensure
306   /// that the outlining is being done correctly.
307   bool CostModel = true;
308 
309   /// The set of outlined Instructions, identified by their location in the
310   /// sequential ordering of instructions in a Module.
311   DenseSet<unsigned> Outlined;
312 
313   /// TargetTransformInfo lambda for target specific information.
314   function_ref<TargetTransformInfo &(Function &)> getTTI;
315 
316   /// A mapping from newly created reloaded output values to the original value.
317   /// If an value is replace by an output from an outlined region, this maps
318   /// that Value, back to its original Value.
319   DenseMap<Value *, Value *> OutputMappings;
320 
321   /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
322   function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
323 
324   /// The optimization remark emitter for the pass.
325   function_ref<OptimizationRemarkEmitter &(Function &)> getORE;
326 
327   /// The memory allocator used to allocate the CodeExtractors.
328   SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
329 
330   /// The memory allocator used to allocate the OutlinableRegions.
331   SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
332 
333   /// The memory allocator used to allocate new IRInstructionData.
334   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
335 
336   /// Custom InstVisitor to classify different instructions for whether it can
337   /// be analyzed for similarity.  This is needed as there may be instruction we
338   /// can identify as having similarity, but are more complicated to outline.
339   struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
340     InstructionAllowed() = default;
341 
342     bool visitBranchInst(BranchInst &BI) { return EnableBranches; }
343     bool visitPHINode(PHINode &PN) { return EnableBranches; }
344     // TODO: Handle allocas.
345     bool visitAllocaInst(AllocaInst &AI) { return false; }
346     // VAArg instructions are not allowed since this could cause difficulty when
347     // differentiating between different sets of variable instructions in
348     // the deduplicated outlined regions.
349     bool visitVAArgInst(VAArgInst &VI) { return false; }
350     // We exclude all exception handling cases since they are so context
351     // dependent.
352     bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
353     bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
354     // DebugInfo should be included in the regions, but should not be
355     // analyzed for similarity as it has no bearing on the outcome of the
356     // program.
357     bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
358     // TODO: Handle specific intrinsics individually from those that can be
359     // handled.
360     bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
361     // We only handle CallInsts that are not indirect, since we cannot guarantee
362     // that they have a name in these cases.
363     bool visitCallInst(CallInst &CI) {
364       Function *F = CI.getCalledFunction();
365       bool IsIndirectCall = CI.isIndirectCall();
366       if (IsIndirectCall && !EnableIndirectCalls)
367         return false;
368       if (!F && !IsIndirectCall)
369         return false;
370       // Returning twice can cause issues with the state of the function call
371       // that were not expected when the function was used, so we do not include
372       // the call in outlined functions.
373       if (CI.canReturnTwice())
374         return false;
375       return true;
376     }
377     // TODO: Handle FreezeInsts.  Since a frozen value could be frozen inside
378     // the outlined region, and then returned as an output, this will have to be
379     // handled differently.
380     bool visitFreezeInst(FreezeInst &CI) { return false; }
381     // TODO: We do not current handle similarity that changes the control flow.
382     bool visitInvokeInst(InvokeInst &II) { return false; }
383     // TODO: We do not current handle similarity that changes the control flow.
384     bool visitCallBrInst(CallBrInst &CBI) { return false; }
385     // TODO: Handle interblock similarity.
386     bool visitTerminator(Instruction &I) { return false; }
387     bool visitInstruction(Instruction &I) { return true; }
388 
389     // The flag variable that marks whether we should allow branch instructions
390     // to be outlined.
391     bool EnableBranches = false;
392 
393     // The flag variable that marks whether we should allow indirect calls
394     // to be outlined.
395     bool EnableIndirectCalls = true;
396 
397     // The flag variable that marks whether we should allow intrinsics
398     // instructions to be outlined.
399     bool EnableIntrinsics = false;
400   };
401 
402   /// A InstVisitor used to exclude certain instructions from being outlined.
403   InstructionAllowed InstructionClassifier;
404 };
405 
406 /// Pass to outline similar regions.
407 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
408 public:
409   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
410 };
411 
412 } // end namespace llvm
413 
414 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
415