1 //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file provides a LoopVectorizationPlanner class. 11 /// InnerLoopVectorizer vectorizes loops which contain only one basic 12 /// LoopVectorizationPlanner - drives the vectorization process after having 13 /// passed Legality checks. 14 /// The planner builds and optimizes the Vectorization Plans which record the 15 /// decisions how to vectorize the given loop. In particular, represent the 16 /// control-flow of the vectorized version, the replication of instructions that 17 /// are to be scalarized, and interleave access groups. 18 /// 19 /// Also provides a VPlan-based builder utility analogous to IRBuilder. 20 /// It provides an instruction-level API for generating VPInstructions while 21 /// abstracting away the Recipe manipulation details. 22 //===----------------------------------------------------------------------===// 23 24 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 25 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 26 27 #include "VPlan.h" 28 #include "llvm/ADT/SmallSet.h" 29 #include "llvm/Support/InstructionCost.h" 30 31 namespace llvm { 32 33 class LoopInfo; 34 class LoopVectorizationLegality; 35 class LoopVectorizationCostModel; 36 class PredicatedScalarEvolution; 37 class LoopVectorizeHints; 38 class OptimizationRemarkEmitter; 39 class TargetTransformInfo; 40 class TargetLibraryInfo; 41 class VPRecipeBuilder; 42 43 /// VPlan-based builder utility analogous to IRBuilder. 44 class VPBuilder { 45 VPBasicBlock *BB = nullptr; 46 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); 47 48 VPInstruction *createInstruction(unsigned Opcode, 49 ArrayRef<VPValue *> Operands, DebugLoc DL, 50 const Twine &Name = "") { 51 VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL, Name); 52 if (BB) 53 BB->insert(Instr, InsertPt); 54 return Instr; 55 } 56 57 VPInstruction *createInstruction(unsigned Opcode, 58 std::initializer_list<VPValue *> Operands, 59 DebugLoc DL, const Twine &Name = "") { 60 return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name); 61 } 62 63 public: 64 VPBuilder() = default; 65 66 /// Clear the insertion point: created instructions will not be inserted into 67 /// a block. 68 void clearInsertionPoint() { 69 BB = nullptr; 70 InsertPt = VPBasicBlock::iterator(); 71 } 72 73 VPBasicBlock *getInsertBlock() const { return BB; } 74 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; } 75 76 /// InsertPoint - A saved insertion point. 77 class VPInsertPoint { 78 VPBasicBlock *Block = nullptr; 79 VPBasicBlock::iterator Point; 80 81 public: 82 /// Creates a new insertion point which doesn't point to anything. 83 VPInsertPoint() = default; 84 85 /// Creates a new insertion point at the given location. 86 VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint) 87 : Block(InsertBlock), Point(InsertPoint) {} 88 89 /// Returns true if this insert point is set. 90 bool isSet() const { return Block != nullptr; } 91 92 VPBasicBlock *getBlock() const { return Block; } 93 VPBasicBlock::iterator getPoint() const { return Point; } 94 }; 95 96 /// Sets the current insert point to a previously-saved location. 97 void restoreIP(VPInsertPoint IP) { 98 if (IP.isSet()) 99 setInsertPoint(IP.getBlock(), IP.getPoint()); 100 else 101 clearInsertionPoint(); 102 } 103 104 /// This specifies that created VPInstructions should be appended to the end 105 /// of the specified block. 106 void setInsertPoint(VPBasicBlock *TheBB) { 107 assert(TheBB && "Attempting to set a null insert point"); 108 BB = TheBB; 109 InsertPt = BB->end(); 110 } 111 112 /// This specifies that created instructions should be inserted at the 113 /// specified point. 114 void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) { 115 BB = TheBB; 116 InsertPt = IP; 117 } 118 119 /// Insert and return the specified instruction. 120 VPInstruction *insert(VPInstruction *I) const { 121 BB->insert(I, InsertPt); 122 return I; 123 } 124 125 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as 126 /// its underlying Instruction. 127 VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, 128 Instruction *Inst = nullptr, const Twine &Name = "") { 129 DebugLoc DL; 130 if (Inst) 131 DL = Inst->getDebugLoc(); 132 VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name); 133 NewVPInst->setUnderlyingValue(Inst); 134 return NewVPInst; 135 } 136 VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, 137 DebugLoc DL, const Twine &Name = "") { 138 return createInstruction(Opcode, Operands, DL, Name); 139 } 140 141 VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") { 142 return createInstruction(VPInstruction::Not, {Operand}, DL, Name); 143 } 144 145 VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL, 146 const Twine &Name = "") { 147 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name); 148 } 149 150 VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL, 151 const Twine &Name = "") { 152 return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name); 153 } 154 155 VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, 156 DebugLoc DL, const Twine &Name = "") { 157 return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL, 158 Name); 159 } 160 161 //===--------------------------------------------------------------------===// 162 // RAII helpers. 163 //===--------------------------------------------------------------------===// 164 165 /// RAII object that stores the current insertion point and restores it when 166 /// the object is destroyed. 167 class InsertPointGuard { 168 VPBuilder &Builder; 169 VPBasicBlock *Block; 170 VPBasicBlock::iterator Point; 171 172 public: 173 InsertPointGuard(VPBuilder &B) 174 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {} 175 176 InsertPointGuard(const InsertPointGuard &) = delete; 177 InsertPointGuard &operator=(const InsertPointGuard &) = delete; 178 179 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); } 180 }; 181 }; 182 183 /// TODO: The following VectorizationFactor was pulled out of 184 /// LoopVectorizationCostModel class. LV also deals with 185 /// VectorizerParams::VectorizationFactor and VectorizationCostTy. 186 /// We need to streamline them. 187 188 /// Information about vectorization costs. 189 struct VectorizationFactor { 190 /// Vector width with best cost. 191 ElementCount Width; 192 193 /// Cost of the loop with that width. 194 InstructionCost Cost; 195 196 /// Cost of the scalar loop. 197 InstructionCost ScalarCost; 198 199 /// The minimum trip count required to make vectorization profitable, e.g. due 200 /// to runtime checks. 201 ElementCount MinProfitableTripCount; 202 203 VectorizationFactor(ElementCount Width, InstructionCost Cost, 204 InstructionCost ScalarCost) 205 : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {} 206 207 /// Width 1 means no vectorization, cost 0 means uncomputed cost. 208 static VectorizationFactor Disabled() { 209 return {ElementCount::getFixed(1), 0, 0}; 210 } 211 212 bool operator==(const VectorizationFactor &rhs) const { 213 return Width == rhs.Width && Cost == rhs.Cost; 214 } 215 216 bool operator!=(const VectorizationFactor &rhs) const { 217 return !(*this == rhs); 218 } 219 }; 220 221 /// ElementCountComparator creates a total ordering for ElementCount 222 /// for the purposes of using it in a set structure. 223 struct ElementCountComparator { 224 bool operator()(const ElementCount &LHS, const ElementCount &RHS) const { 225 return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) < 226 std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue()); 227 } 228 }; 229 using ElementCountSet = SmallSet<ElementCount, 16, ElementCountComparator>; 230 231 /// A class that represents two vectorization factors (initialized with 0 by 232 /// default). One for fixed-width vectorization and one for scalable 233 /// vectorization. This can be used by the vectorizer to choose from a range of 234 /// fixed and/or scalable VFs in order to find the most cost-effective VF to 235 /// vectorize with. 236 struct FixedScalableVFPair { 237 ElementCount FixedVF; 238 ElementCount ScalableVF; 239 240 FixedScalableVFPair() 241 : FixedVF(ElementCount::getFixed(0)), 242 ScalableVF(ElementCount::getScalable(0)) {} 243 FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() { 244 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max; 245 } 246 FixedScalableVFPair(const ElementCount &FixedVF, 247 const ElementCount &ScalableVF) 248 : FixedVF(FixedVF), ScalableVF(ScalableVF) { 249 assert(!FixedVF.isScalable() && ScalableVF.isScalable() && 250 "Invalid scalable properties"); 251 } 252 253 static FixedScalableVFPair getNone() { return FixedScalableVFPair(); } 254 255 /// \return true if either fixed- or scalable VF is non-zero. 256 explicit operator bool() const { return FixedVF || ScalableVF; } 257 258 /// \return true if either fixed- or scalable VF is a valid vector VF. 259 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); } 260 }; 261 262 /// Planner drives the vectorization process after having passed 263 /// Legality checks. 264 class LoopVectorizationPlanner { 265 /// The loop that we evaluate. 266 Loop *OrigLoop; 267 268 /// Loop Info analysis. 269 LoopInfo *LI; 270 271 /// Target Library Info. 272 const TargetLibraryInfo *TLI; 273 274 /// Target Transform Info. 275 const TargetTransformInfo &TTI; 276 277 /// The legality analysis. 278 LoopVectorizationLegality *Legal; 279 280 /// The profitability analysis. 281 LoopVectorizationCostModel &CM; 282 283 /// The interleaved access analysis. 284 InterleavedAccessInfo &IAI; 285 286 PredicatedScalarEvolution &PSE; 287 288 const LoopVectorizeHints &Hints; 289 290 OptimizationRemarkEmitter *ORE; 291 292 SmallVector<VPlanPtr, 4> VPlans; 293 294 /// Profitable vector factors. 295 SmallVector<VectorizationFactor, 8> ProfitableVFs; 296 297 /// A builder used to construct the current plan. 298 VPBuilder Builder; 299 300 public: 301 LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, 302 const TargetTransformInfo &TTI, 303 LoopVectorizationLegality *Legal, 304 LoopVectorizationCostModel &CM, 305 InterleavedAccessInfo &IAI, 306 PredicatedScalarEvolution &PSE, 307 const LoopVectorizeHints &Hints, 308 OptimizationRemarkEmitter *ORE) 309 : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI), 310 PSE(PSE), Hints(Hints), ORE(ORE) {} 311 312 /// Plan how to best vectorize, return the best VF and its cost, or 313 /// std::nullopt if vectorization and interleaving should be avoided up front. 314 std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC); 315 316 /// Use the VPlan-native path to plan how to best vectorize, return the best 317 /// VF and its cost. 318 VectorizationFactor planInVPlanNativePath(ElementCount UserVF); 319 320 /// Return the best VPlan for \p VF. 321 VPlan &getBestPlanFor(ElementCount VF) const; 322 323 /// Generate the IR code for the body of the vectorized loop according to the 324 /// best selected \p VF, \p UF and VPlan \p BestPlan. 325 /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue 326 /// vectorization re-using plans for both the main and epilogue vector loops. 327 /// It should be removed once the re-use issue has been fixed. 328 /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop 329 /// to re-use expansion results generated during main plan execution. Returns 330 /// a mapping of SCEVs to their expanded IR values. Note that this is a 331 /// temporary workaround needed due to the current epilogue handling. 332 DenseMap<const SCEV *, Value *> 333 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, 334 InnerLoopVectorizer &LB, DominatorTree *DT, 335 bool IsEpilogueVectorization, 336 DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr); 337 338 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 339 void printPlans(raw_ostream &O); 340 #endif 341 342 /// Look through the existing plans and return true if we have one with 343 /// vectorization factor \p VF. 344 bool hasPlanWithVF(ElementCount VF) const { 345 return any_of(VPlans, 346 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); }); 347 } 348 349 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying 350 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the 351 /// returned value holds for the entire \p Range. 352 static bool 353 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate, 354 VFRange &Range); 355 356 /// \return The most profitable vectorization factor and the cost of that VF 357 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if 358 /// epilogue vectorization is not supported for the loop. 359 VectorizationFactor 360 selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC); 361 362 protected: 363 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, 364 /// according to the information gathered by Legal when it checked if it is 365 /// legal to vectorize the loop. 366 void buildVPlans(ElementCount MinVF, ElementCount MaxVF); 367 368 private: 369 /// Build a VPlan according to the information gathered by Legal. \return a 370 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End 371 /// exclusive, possibly decreasing \p Range.End. 372 VPlanPtr buildVPlan(VFRange &Range); 373 374 /// Build a VPlan using VPRecipes according to the information gather by 375 /// Legal. This method is only used for the legacy inner loop vectorizer. 376 /// \p Range's largest included VF is restricted to the maximum VF the 377 /// returned VPlan is valid for. If no VPlan can be built for the input range, 378 /// set the largest included VF to the maximum VF for which no plan could be 379 /// built. 380 std::optional<VPlanPtr> tryToBuildVPlanWithVPRecipes( 381 VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions); 382 383 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, 384 /// according to the information gathered by Legal when it checked if it is 385 /// legal to vectorize the loop. This method creates VPlans using VPRecipes. 386 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF); 387 388 // Adjust the recipes for reductions. For in-loop reductions the chain of 389 // instructions leading from the loop exit instr to the phi need to be 390 // converted to reductions, with one operand being vector and the other being 391 // the scalar reduction chain. For other reductions, a select is introduced 392 // between the phi and live-out recipes when folding the tail. 393 void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan, 394 VPRecipeBuilder &RecipeBuilder, 395 ElementCount MinVF); 396 397 /// \return The most profitable vectorization factor and the cost of that VF. 398 /// This method checks every VF in \p CandidateVFs. 399 VectorizationFactor 400 selectVectorizationFactor(const ElementCountSet &CandidateVFs); 401 402 /// Returns true if the per-lane cost of VectorizationFactor A is lower than 403 /// that of B. 404 bool isMoreProfitable(const VectorizationFactor &A, 405 const VectorizationFactor &B) const; 406 407 /// Determines if we have the infrastructure to vectorize the loop and its 408 /// epilogue, assuming the main loop is vectorized by \p VF. 409 bool isCandidateForEpilogueVectorization(const ElementCount VF) const; 410 }; 411 412 } // namespace llvm 413 414 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H 415