1 //===- IROutliner.h - Extract similar IR regions into functions ------------==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // The interface file for the IROutliner which is used by the IROutliner Pass. 11 // 12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions 13 // of code. It evaluates each set of IRSimilarityCandidates with an estimate of 14 // whether it will provide code size reduction. Each region is extracted using 15 // the code extractor. These extracted functions are consolidated into a single 16 // function and called from the extracted call site. 17 // 18 // For example: 19 // \code 20 // %1 = add i32 %a, %b 21 // %2 = add i32 %b, %a 22 // %3 = add i32 %b, %a 23 // %4 = add i32 %a, %b 24 // \endcode 25 // would become function 26 // \code 27 // define internal void outlined_ir_function(i32 %0, i32 %1) { 28 // %1 = add i32 %0, %1 29 // %2 = add i32 %1, %0 30 // ret void 31 // } 32 // \endcode 33 // with calls: 34 // \code 35 // call void outlined_ir_function(i32 %a, i32 %b) 36 // call void outlined_ir_function(i32 %b, i32 %a) 37 // \endcode 38 // 39 //===----------------------------------------------------------------------===// 40 41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H 42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H 43 44 #include "llvm/Analysis/IRSimilarityIdentifier.h" 45 #include "llvm/IR/PassManager.h" 46 #include "llvm/IR/ValueMap.h" 47 #include "llvm/Support/InstructionCost.h" 48 #include "llvm/Transforms/Utils/CodeExtractor.h" 49 #include <set> 50 51 struct OutlinableGroup; 52 53 namespace llvm { 54 using namespace IRSimilarity; 55 56 class Module; 57 class TargetTransformInfo; 58 class OptimizationRemarkEmitter; 59 60 /// The OutlinableRegion holds all the information for a specific region, or 61 /// sequence of instructions. This includes what values need to be hoisted to 62 /// arguments from the extracted function, inputs and outputs to the region, and 63 /// mapping from the extracted function arguments to overall function arguments. 64 struct OutlinableRegion { 65 /// Describes the region of code. 66 IRSimilarityCandidate *Candidate = nullptr; 67 68 /// If this region is outlined, the front and back IRInstructionData could 69 /// potentially become invalidated if the only new instruction is a call. 70 /// This ensures that we replace in the instruction in the IRInstructionData. 71 IRInstructionData *NewFront = nullptr; 72 IRInstructionData *NewBack = nullptr; 73 74 /// The number of extracted inputs from the CodeExtractor. 75 unsigned NumExtractedInputs = 0; 76 77 /// The corresponding BasicBlock with the appropriate stores for this 78 /// OutlinableRegion in the overall function. 79 unsigned OutputBlockNum = -1; 80 81 /// Mapping the extracted argument number to the argument number in the 82 /// overall function. Since there will be inputs, such as elevated constants 83 /// that are not the same in each region in a SimilarityGroup, or values that 84 /// cannot be sunk into the extracted section in every region, we must keep 85 /// track of which extracted argument maps to which overall argument. 86 DenseMap<unsigned, unsigned> ExtractedArgToAgg; 87 DenseMap<unsigned, unsigned> AggArgToExtracted; 88 89 /// Marks whether we need to change the order of the arguments when mapping 90 /// the old extracted function call to the new aggregate outlined function 91 /// call. 92 bool ChangedArgOrder = false; 93 94 /// Marks whether this region ends in a branch, there is special handling 95 /// required for the following basic blocks in this case. 96 bool EndsInBranch = false; 97 98 /// The PHIBlocks with their corresponding return block based on the return 99 /// value as the key. 100 DenseMap<Value *, BasicBlock *> PHIBlocks; 101 102 /// Mapping of the argument number in the deduplicated function 103 /// to a given constant, which is used when creating the arguments to the call 104 /// to the newly created deduplicated function. This is handled separately 105 /// since the CodeExtractor does not recognize constants. 106 DenseMap<unsigned, Constant *> AggArgToConstant; 107 108 /// The global value numbers that are used as outputs for this section. Once 109 /// extracted, each output will be stored to an output register. This 110 /// documents the global value numbers that are used in this pattern. 111 SmallVector<unsigned, 4> GVNStores; 112 113 /// Used to create an outlined function. 114 CodeExtractor *CE = nullptr; 115 116 /// The call site of the extracted region. 117 CallInst *Call = nullptr; 118 119 /// The function for the extracted region. 120 Function *ExtractedFunction = nullptr; 121 122 /// Flag for whether we have split out the IRSimilarityCanidate. That is, 123 /// make the region contained the IRSimilarityCandidate its own BasicBlock. 124 bool CandidateSplit = false; 125 126 /// Flag for whether we should not consider this region for extraction. 127 bool IgnoreRegion = false; 128 129 /// The BasicBlock that is before the start of the region BasicBlock, 130 /// only defined when the region has been split. 131 BasicBlock *PrevBB = nullptr; 132 133 /// The BasicBlock that contains the starting instruction of the region. 134 BasicBlock *StartBB = nullptr; 135 136 /// The BasicBlock that contains the ending instruction of the region. 137 BasicBlock *EndBB = nullptr; 138 139 /// The BasicBlock that is after the start of the region BasicBlock, 140 /// only defined when the region has been split. 141 BasicBlock *FollowBB = nullptr; 142 143 /// The Outlinable Group that contains this region and structurally similar 144 /// regions to this region. 145 OutlinableGroup *Parent = nullptr; 146 147 OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group) 148 : Candidate(&C), Parent(&Group) { 149 StartBB = C.getStartBB(); 150 EndBB = C.getEndBB(); 151 } 152 153 /// For the contained region, split the parent BasicBlock at the starting and 154 /// ending instructions of the contained IRSimilarityCandidate. 155 void splitCandidate(); 156 157 /// For the contained region, reattach the BasicBlock at the starting and 158 /// ending instructions of the contained IRSimilarityCandidate, or if the 159 /// function has been extracted, the start and end of the BasicBlock 160 /// containing the called function. 161 void reattachCandidate(); 162 163 /// Find a corresponding value for \p V in similar OutlinableRegion \p Other. 164 /// 165 /// \param Other [in] - The OutlinableRegion to find the corresponding Value 166 /// in. 167 /// \param V [in] - The Value to look for in the other region. 168 /// \return The corresponding Value to \p V if it exists, otherwise nullptr. 169 Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V); 170 171 /// Get the size of the code removed from the region. 172 /// 173 /// \param [in] TTI - The TargetTransformInfo for the parent function. 174 /// \returns the code size of the region 175 InstructionCost getBenefit(TargetTransformInfo &TTI); 176 }; 177 178 /// This class is a pass that identifies similarity in a Module, extracts 179 /// instances of the similarity, and then consolidating the similar regions 180 /// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass 181 /// to identify the similar regions of code, and then extracts the similar 182 /// sections into a single function. See the above for an example as to 183 /// how code is extracted and consolidated into a single function. 184 class IROutliner { 185 public: 186 IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI, 187 function_ref<IRSimilarityIdentifier &(Module &)> GIRSI, 188 function_ref<OptimizationRemarkEmitter &(Function &)> GORE) 189 : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) { 190 191 // Check that the DenseMap implementation has not changed. 192 assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 && 193 "DenseMapInfo<unsigned>'s empty key isn't -1!"); 194 assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 && 195 "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); 196 } 197 bool run(Module &M); 198 199 private: 200 /// Find repeated similar code sequences in \p M and outline them into new 201 /// Functions. 202 /// 203 /// \param [in] M - The module to outline from. 204 /// \returns The number of Functions created. 205 unsigned doOutline(Module &M); 206 207 /// Check whether an OutlinableRegion is incompatible with code already 208 /// outlined. OutlinableRegions are incomptaible when there are overlapping 209 /// instructions, or code that has not been recorded has been added to the 210 /// instructions. 211 /// 212 /// \param [in] Region - The OutlinableRegion to check for conflicts with 213 /// already outlined code. 214 /// \returns whether the region can safely be outlined. 215 bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region); 216 217 /// Remove all the IRSimilarityCandidates from \p CandidateVec that have 218 /// instructions contained in a previously outlined region and put the 219 /// remaining regions in \p CurrentGroup. 220 /// 221 /// \param [in] CandidateVec - List of similarity candidates for regions with 222 /// the same similarity structure. 223 /// \param [in,out] CurrentGroup - Contains the potential sections to 224 /// be outlined. 225 void 226 pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec, 227 OutlinableGroup &CurrentGroup); 228 229 /// Create the function based on the overall types found in the current 230 /// regions being outlined. 231 /// 232 /// \param M - The module to outline from. 233 /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined. 234 /// \param [in] FunctionNameSuffix - How many functions have we previously 235 /// created. 236 /// \returns the newly created function. 237 Function *createFunction(Module &M, OutlinableGroup &CG, 238 unsigned FunctionNameSuffix); 239 240 /// Identify the needed extracted inputs in a section, and add to the overall 241 /// function if needed. 242 /// 243 /// \param [in] M - The module to outline from. 244 /// \param [in,out] Region - The region to be extracted. 245 /// \param [in] NotSame - The global value numbers of the Values in the region 246 /// that do not have the same Constant in each strucutrally similar region. 247 void findAddInputsOutputs(Module &M, OutlinableRegion &Region, 248 DenseSet<unsigned> &NotSame); 249 250 /// Find the number of instructions that will be removed by extracting the 251 /// OutlinableRegions in \p CurrentGroup. 252 /// 253 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be 254 /// analyzed. 255 /// \returns the number of outlined instructions across all regions. 256 InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup); 257 258 /// Find the number of instructions that will be added by reloading arguments. 259 /// 260 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be 261 /// analyzed. 262 /// \returns the number of added reload instructions across all regions. 263 InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup); 264 265 /// Find the cost and the benefit of \p CurrentGroup and save it back to 266 /// \p CurrentGroup. 267 /// 268 /// \param [in] M - The module being analyzed 269 /// \param [in,out] CurrentGroup - The overall outlined section 270 void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup); 271 272 /// Update the output mapping based on the load instruction, and the outputs 273 /// of the extracted function. 274 /// 275 /// \param Region - The region extracted 276 /// \param Outputs - The outputs from the extracted function. 277 /// \param LI - The load instruction used to update the mapping. 278 void updateOutputMapping(OutlinableRegion &Region, 279 ArrayRef<Value *> Outputs, LoadInst *LI); 280 281 /// Extract \p Region into its own function. 282 /// 283 /// \param [in] Region - The region to be extracted into its own function. 284 /// \returns True if it was successfully outlined. 285 bool extractSection(OutlinableRegion &Region); 286 287 /// For the similarities found, and the extracted sections, create a single 288 /// outlined function with appropriate output blocks as necessary. 289 /// 290 /// \param [in] M - The module to outline from 291 /// \param [in] CurrentGroup - The set of extracted sections to consolidate. 292 /// \param [in,out] FuncsToRemove - List of functions to remove from the 293 /// module after outlining is completed. 294 /// \param [in,out] OutlinedFunctionNum - the number of new outlined 295 /// functions. 296 void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup, 297 std::vector<Function *> &FuncsToRemove, 298 unsigned &OutlinedFunctionNum); 299 300 /// If true, enables us to outline from functions that have LinkOnceFromODR 301 /// linkages. 302 bool OutlineFromLinkODRs = false; 303 304 /// If false, we do not worry if the cost is greater than the benefit. This 305 /// is for debugging and testing, so that we can test small cases to ensure 306 /// that the outlining is being done correctly. 307 bool CostModel = true; 308 309 /// The set of outlined Instructions, identified by their location in the 310 /// sequential ordering of instructions in a Module. 311 DenseSet<unsigned> Outlined; 312 313 /// TargetTransformInfo lambda for target specific information. 314 function_ref<TargetTransformInfo &(Function &)> getTTI; 315 316 /// A mapping from newly created reloaded output values to the original value. 317 /// If an value is replace by an output from an outlined region, this maps 318 /// that Value, back to its original Value. 319 DenseMap<Value *, Value *> OutputMappings; 320 321 /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier. 322 function_ref<IRSimilarityIdentifier &(Module &)> getIRSI; 323 324 /// The optimization remark emitter for the pass. 325 function_ref<OptimizationRemarkEmitter &(Function &)> getORE; 326 327 /// The memory allocator used to allocate the CodeExtractors. 328 SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator; 329 330 /// The memory allocator used to allocate the OutlinableRegions. 331 SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator; 332 333 /// The memory allocator used to allocate new IRInstructionData. 334 SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; 335 336 /// Custom InstVisitor to classify different instructions for whether it can 337 /// be analyzed for similarity. This is needed as there may be instruction we 338 /// can identify as having similarity, but are more complicated to outline. 339 struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> { 340 InstructionAllowed() = default; 341 342 bool visitBranchInst(BranchInst &BI) { return EnableBranches; } 343 bool visitPHINode(PHINode &PN) { return EnableBranches; } 344 // TODO: Handle allocas. 345 bool visitAllocaInst(AllocaInst &AI) { return false; } 346 // VAArg instructions are not allowed since this could cause difficulty when 347 // differentiating between different sets of variable instructions in 348 // the deduplicated outlined regions. 349 bool visitVAArgInst(VAArgInst &VI) { return false; } 350 // We exclude all exception handling cases since they are so context 351 // dependent. 352 bool visitLandingPadInst(LandingPadInst &LPI) { return false; } 353 bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; } 354 // DebugInfo should be included in the regions, but should not be 355 // analyzed for similarity as it has no bearing on the outcome of the 356 // program. 357 bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } 358 // TODO: Handle specific intrinsics individually from those that can be 359 // handled. 360 bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } 361 // We only handle CallInsts that are not indirect, since we cannot guarantee 362 // that they have a name in these cases. 363 bool visitCallInst(CallInst &CI) { 364 Function *F = CI.getCalledFunction(); 365 bool IsIndirectCall = CI.isIndirectCall(); 366 if (IsIndirectCall && !EnableIndirectCalls) 367 return false; 368 if (!F && !IsIndirectCall) 369 return false; 370 // Returning twice can cause issues with the state of the function call 371 // that were not expected when the function was used, so we do not include 372 // the call in outlined functions. 373 if (CI.canReturnTwice()) 374 return false; 375 return true; 376 } 377 // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside 378 // the outlined region, and then returned as an output, this will have to be 379 // handled differently. 380 bool visitFreezeInst(FreezeInst &CI) { return false; } 381 // TODO: We do not current handle similarity that changes the control flow. 382 bool visitInvokeInst(InvokeInst &II) { return false; } 383 // TODO: We do not current handle similarity that changes the control flow. 384 bool visitCallBrInst(CallBrInst &CBI) { return false; } 385 // TODO: Handle interblock similarity. 386 bool visitTerminator(Instruction &I) { return false; } 387 bool visitInstruction(Instruction &I) { return true; } 388 389 // The flag variable that marks whether we should allow branch instructions 390 // to be outlined. 391 bool EnableBranches = false; 392 393 // The flag variable that marks whether we should allow indirect calls 394 // to be outlined. 395 bool EnableIndirectCalls = true; 396 397 // The flag variable that marks whether we should allow intrinsics 398 // instructions to be outlined. 399 bool EnableIntrinsics = false; 400 }; 401 402 /// A InstVisitor used to exclude certain instructions from being outlined. 403 InstructionAllowed InstructionClassifier; 404 }; 405 406 /// Pass to outline similar regions. 407 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> { 408 public: 409 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 410 }; 411 412 } // end namespace llvm 413 414 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H 415