1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines the LoopVectorizationLegality class. Original code 11 /// in Loop Vectorizer has been moved out to its own file for modularity 12 /// and reusability. 13 /// 14 /// Currently, it works for innermost loop vectorization. Extending this to 15 /// outer loop vectorization is a TODO item. 16 /// 17 /// Also provides: 18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations 19 /// locally for easy look up. It has the ability to write them back as 20 /// loop metadata, upon request. 21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose 22 /// of reporting useful failure to vectorize message. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 28 29 #include "llvm/ADT/MapVector.h" 30 #include "llvm/Analysis/LoopAccessAnalysis.h" 31 #include "llvm/Support/TypeSize.h" 32 #include "llvm/Transforms/Utils/LoopUtils.h" 33 34 namespace llvm { 35 class AssumptionCache; 36 class BasicBlock; 37 class BlockFrequencyInfo; 38 class DemandedBits; 39 class DominatorTree; 40 class Function; 41 class Loop; 42 class LoopInfo; 43 class Metadata; 44 class OptimizationRemarkEmitter; 45 class PredicatedScalarEvolution; 46 class ProfileSummaryInfo; 47 class TargetLibraryInfo; 48 class TargetTransformInfo; 49 class Type; 50 51 /// Utility class for getting and setting loop vectorizer hints in the form 52 /// of loop metadata. 53 /// This class keeps a number of loop annotations locally (as member variables) 54 /// and can, upon request, write them back as metadata on the loop. It will 55 /// initially scan the loop for existing metadata, and will update the local 56 /// values based on information in the loop. 57 /// We cannot write all values to metadata, as the mere presence of some info, 58 /// for example 'force', means a decision has been made. So, we need to be 59 /// careful NOT to add them if the user hasn't specifically asked so. 60 class LoopVectorizeHints { 61 enum HintKind { 62 HK_WIDTH, 63 HK_INTERLEAVE, 64 HK_FORCE, 65 HK_ISVECTORIZED, 66 HK_PREDICATE, 67 HK_SCALABLE 68 }; 69 70 /// Hint - associates name and validation with the hint value. 71 struct Hint { 72 const char *Name; 73 unsigned Value; // This may have to change for non-numeric values. 74 HintKind Kind; 75 HintHint76 Hint(const char *Name, unsigned Value, HintKind Kind) 77 : Name(Name), Value(Value), Kind(Kind) {} 78 79 bool validate(unsigned Val); 80 }; 81 82 /// Vectorization width. 83 Hint Width; 84 85 /// Vectorization interleave factor. 86 Hint Interleave; 87 88 /// Vectorization forced 89 Hint Force; 90 91 /// Already Vectorized 92 Hint IsVectorized; 93 94 /// Vector Predicate 95 Hint Predicate; 96 97 /// Says whether we should use fixed width or scalable vectorization. 98 Hint Scalable; 99 100 /// Return the loop metadata prefix. Prefix()101 static StringRef Prefix() { return "llvm.loop."; } 102 103 /// True if there is any unsafe math in the loop. 104 bool PotentiallyUnsafe = false; 105 106 public: 107 enum ForceKind { 108 FK_Undefined = -1, ///< Not selected. 109 FK_Disabled = 0, ///< Forcing disabled. 110 FK_Enabled = 1, ///< Forcing enabled. 111 }; 112 113 enum ScalableForceKind { 114 /// Not selected. 115 SK_Unspecified = -1, 116 /// Disables vectorization with scalable vectors. 117 SK_FixedWidthOnly = 0, 118 /// Vectorize loops using scalable vectors or fixed-width vectors, but favor 119 /// scalable vectors when the cost-model is inconclusive. This is the 120 /// default when the scalable.enable hint is enabled through a pragma. 121 SK_PreferScalable = 1 122 }; 123 124 LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, 125 OptimizationRemarkEmitter &ORE, 126 const TargetTransformInfo *TTI = nullptr); 127 128 /// Mark the loop L as already vectorized by setting the width to 1. 129 void setAlreadyVectorized(); 130 131 bool allowVectorization(Function *F, Loop *L, 132 bool VectorizeOnlyWhenForced) const; 133 134 /// Dumps all the hint information. 135 void emitRemarkWithHints() const; 136 getWidth()137 ElementCount getWidth() const { 138 return ElementCount::get(Width.Value, (ScalableForceKind)Scalable.Value == 139 SK_PreferScalable); 140 } 141 getInterleave()142 unsigned getInterleave() const { 143 if (Interleave.Value) 144 return Interleave.Value; 145 // If interleaving is not explicitly set, assume that if we do not want 146 // unrolling, we also don't want any interleaving. 147 if (llvm::hasUnrollTransformation(TheLoop) & TM_Disable) 148 return 1; 149 return 0; 150 } getIsVectorized()151 unsigned getIsVectorized() const { return IsVectorized.Value; } getPredicate()152 unsigned getPredicate() const { return Predicate.Value; } getForce()153 enum ForceKind getForce() const { 154 if ((ForceKind)Force.Value == FK_Undefined && 155 hasDisableAllTransformsHint(TheLoop)) 156 return FK_Disabled; 157 return (ForceKind)Force.Value; 158 } 159 160 /// \return true if scalable vectorization has been explicitly disabled. isScalableVectorizationDisabled()161 bool isScalableVectorizationDisabled() const { 162 return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly; 163 } 164 165 /// If hints are provided that force vectorization, use the AlwaysPrint 166 /// pass name to force the frontend to print the diagnostic. 167 const char *vectorizeAnalysisPassName() const; 168 169 /// When enabling loop hints are provided we allow the vectorizer to change 170 /// the order of operations that is given by the scalar loop. This is not 171 /// enabled by default because can be unsafe or inefficient. For example, 172 /// reordering floating-point operations will change the way round-off 173 /// error accumulates in the loop. 174 bool allowReordering() const; 175 isPotentiallyUnsafe()176 bool isPotentiallyUnsafe() const { 177 // Avoid FP vectorization if the target is unsure about proper support. 178 // This may be related to the SIMD unit in the target not handling 179 // IEEE 754 FP ops properly, or bad single-to-double promotions. 180 // Otherwise, a sequence of vectorized loops, even without reduction, 181 // could lead to different end results on the destination vectors. 182 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; 183 } 184 setPotentiallyUnsafe()185 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } 186 187 private: 188 /// Find hints specified in the loop metadata and update local values. 189 void getHintsFromMetadata(); 190 191 /// Checks string hint with one operand and set value if valid. 192 void setHint(StringRef Name, Metadata *Arg); 193 194 /// The loop these hints belong to. 195 const Loop *TheLoop; 196 197 /// Interface to emit optimization remarks. 198 OptimizationRemarkEmitter &ORE; 199 }; 200 201 /// This holds vectorization requirements that must be verified late in 202 /// the process. The requirements are set by legalize and costmodel. Once 203 /// vectorization has been determined to be possible and profitable the 204 /// requirements can be verified by looking for metadata or compiler options. 205 /// For example, some loops require FP commutativity which is only allowed if 206 /// vectorization is explicitly specified or if the fast-math compiler option 207 /// has been provided. 208 /// Late evaluation of these requirements allows helpful diagnostics to be 209 /// composed that tells the user what need to be done to vectorize the loop. For 210 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late 211 /// evaluation should be used only when diagnostics can generated that can be 212 /// followed by a non-expert user. 213 class LoopVectorizationRequirements { 214 public: 215 /// Track the 1st floating-point instruction that can not be reassociated. addExactFPMathInst(Instruction * I)216 void addExactFPMathInst(Instruction *I) { 217 if (I && !ExactFPMathInst) 218 ExactFPMathInst = I; 219 } 220 getExactFPInst()221 Instruction *getExactFPInst() { return ExactFPMathInst; } 222 223 private: 224 Instruction *ExactFPMathInst = nullptr; 225 }; 226 227 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and 228 /// to what vectorization factor. 229 /// This class does not look at the profitability of vectorization, only the 230 /// legality. This class has two main kinds of checks: 231 /// * Memory checks - The code in canVectorizeMemory checks if vectorization 232 /// will change the order of memory accesses in a way that will change the 233 /// correctness of the program. 234 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory 235 /// checks for a number of different conditions, such as the availability of a 236 /// single induction variable, that all types are supported and vectorize-able, 237 /// etc. This code reflects the capabilities of InnerLoopVectorizer. 238 /// This class is also used by InnerLoopVectorizer for identifying 239 /// induction variable and the different reduction variables. 240 class LoopVectorizationLegality { 241 public: LoopVectorizationLegality(Loop * L,PredicatedScalarEvolution & PSE,DominatorTree * DT,TargetTransformInfo * TTI,TargetLibraryInfo * TLI,Function * F,LoopAccessInfoManager & LAIs,LoopInfo * LI,OptimizationRemarkEmitter * ORE,LoopVectorizationRequirements * R,LoopVectorizeHints * H,DemandedBits * DB,AssumptionCache * AC,BlockFrequencyInfo * BFI,ProfileSummaryInfo * PSI)242 LoopVectorizationLegality( 243 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, 244 TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F, 245 LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE, 246 LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, 247 AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) 248 : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs), 249 ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), 250 PSI(PSI) {} 251 252 /// ReductionList contains the reduction descriptors for all 253 /// of the reductions that were found in the loop. 254 using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>; 255 256 /// InductionList saves induction variables and maps them to the 257 /// induction descriptor. 258 using InductionList = MapVector<PHINode *, InductionDescriptor>; 259 260 /// RecurrenceSet contains the phi nodes that are recurrences other than 261 /// inductions and reductions. 262 using RecurrenceSet = SmallPtrSet<const PHINode *, 8>; 263 264 /// Returns true if it is legal to vectorize this loop. 265 /// This does not mean that it is profitable to vectorize this 266 /// loop, only that it is legal to do so. 267 /// Temporarily taking UseVPlanNativePath parameter. If true, take 268 /// the new code path being implemented for outer loop vectorization 269 /// (should be functional for inner loop vectorization) based on VPlan. 270 /// If false, good old LV code. 271 bool canVectorize(bool UseVPlanNativePath); 272 273 /// Returns true if it is legal to vectorize the FP math operations in this 274 /// loop. Vectorizing is legal if we allow reordering of FP operations, or if 275 /// we can use in-order reductions. 276 bool canVectorizeFPMath(bool EnableStrictReductions); 277 278 /// Return true if we can vectorize this loop while folding its tail by 279 /// masking, and mark all respective loads/stores for masking. 280 /// This object's state is only modified iff this function returns true. 281 bool prepareToFoldTailByMasking(); 282 283 /// Returns the primary induction variable. getPrimaryInduction()284 PHINode *getPrimaryInduction() { return PrimaryInduction; } 285 286 /// Returns the reduction variables found in the loop. getReductionVars()287 const ReductionList &getReductionVars() const { return Reductions; } 288 289 /// Returns the induction variables found in the loop. getInductionVars()290 const InductionList &getInductionVars() const { return Inductions; } 291 292 /// Return the fixed-order recurrences found in the loop. getFixedOrderRecurrences()293 RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; } 294 295 /// Returns the widest induction type. getWidestInductionType()296 Type *getWidestInductionType() { return WidestIndTy; } 297 298 /// Returns True if given store is a final invariant store of one of the 299 /// reductions found in the loop. 300 bool isInvariantStoreOfReduction(StoreInst *SI); 301 302 /// Returns True if given address is invariant and is used to store recurrent 303 /// expression 304 bool isInvariantAddressOfReduction(Value *V); 305 306 /// Returns True if V is a Phi node of an induction variable in this loop. 307 bool isInductionPhi(const Value *V) const; 308 309 /// Returns a pointer to the induction descriptor, if \p Phi is an integer or 310 /// floating point induction. 311 const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const; 312 313 /// Returns a pointer to the induction descriptor, if \p Phi is pointer 314 /// induction. 315 const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const; 316 317 /// Returns True if V is a cast that is part of an induction def-use chain, 318 /// and had been proven to be redundant under a runtime guard (in other 319 /// words, the cast has the same SCEV expression as the induction phi). 320 bool isCastedInductionVariable(const Value *V) const; 321 322 /// Returns True if V can be considered as an induction variable in this 323 /// loop. V can be the induction phi, or some redundant cast in the def-use 324 /// chain of the inducion phi. 325 bool isInductionVariable(const Value *V) const; 326 327 /// Returns True if PN is a reduction variable in this loop. isReductionVariable(PHINode * PN)328 bool isReductionVariable(PHINode *PN) const { return Reductions.count(PN); } 329 330 /// Returns True if Phi is a fixed-order recurrence in this loop. 331 bool isFixedOrderRecurrence(const PHINode *Phi) const; 332 333 /// Return true if the block BB needs to be predicated in order for the loop 334 /// to be vectorized. 335 bool blockNeedsPredication(BasicBlock *BB) const; 336 337 /// Check if this pointer is consecutive when vectorizing. This happens 338 /// when the last index of the GEP is the induction variable, or that the 339 /// pointer itself is an induction variable. 340 /// This check allows us to vectorize A[idx] into a wide load/store. 341 /// Returns: 342 /// 0 - Stride is unknown or non-consecutive. 343 /// 1 - Address is consecutive. 344 /// -1 - Address is consecutive, and decreasing. 345 /// NOTE: This method must only be used before modifying the original scalar 346 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). 347 int isConsecutivePtr(Type *AccessTy, Value *Ptr) const; 348 349 /// Returns true if value V is uniform across \p VF lanes, when \p VF is 350 /// provided, and otherwise if \p V is invariant across all loop iterations. 351 bool isInvariant(Value *V) const; 352 353 /// Returns true if value V is uniform across \p VF lanes, when \p VF is 354 /// provided, and otherwise if \p V is invariant across all loop iterations. 355 bool isUniform(Value *V, ElementCount VF) const; 356 357 /// A uniform memory op is a load or store which accesses the same memory 358 /// location on all \p VF lanes, if \p VF is provided and otherwise if the 359 /// memory location is invariant. 360 bool isUniformMemOp(Instruction &I, ElementCount VF) const; 361 362 /// Returns the information that we collected about runtime memory check. getRuntimePointerChecking()363 const RuntimePointerChecking *getRuntimePointerChecking() const { 364 return LAI->getRuntimePointerChecking(); 365 } 366 getLAI()367 const LoopAccessInfo *getLAI() const { return LAI; } 368 isSafeForAnyVectorWidth()369 bool isSafeForAnyVectorWidth() const { 370 return LAI->getDepChecker().isSafeForAnyVectorWidth(); 371 } 372 getMaxSafeVectorWidthInBits()373 uint64_t getMaxSafeVectorWidthInBits() const { 374 return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); 375 } 376 377 /// Returns true if vector representation of the instruction \p I 378 /// requires mask. isMaskRequired(const Instruction * I)379 bool isMaskRequired(const Instruction *I) const { 380 return MaskedOp.contains(I); 381 } 382 383 /// Returns true if there is at least one function call in the loop which 384 /// has a vectorized variant available. hasVectorCallVariants()385 bool hasVectorCallVariants() const { return VecCallVariantsFound; } 386 getNumStores()387 unsigned getNumStores() const { return LAI->getNumStores(); } getNumLoads()388 unsigned getNumLoads() const { return LAI->getNumLoads(); } 389 getPredicatedScalarEvolution()390 PredicatedScalarEvolution *getPredicatedScalarEvolution() const { 391 return &PSE; 392 } 393 getLoop()394 Loop *getLoop() const { return TheLoop; } 395 getLoopInfo()396 LoopInfo *getLoopInfo() const { return LI; } 397 getAssumptionCache()398 AssumptionCache *getAssumptionCache() const { return AC; } 399 getScalarEvolution()400 ScalarEvolution *getScalarEvolution() const { return PSE.getSE(); } 401 getDominatorTree()402 DominatorTree *getDominatorTree() const { return DT; } 403 404 private: 405 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all 406 /// its nested loops are considered legal for vectorization. These legal 407 /// checks are common for inner and outer loop vectorization. 408 /// Temporarily taking UseVPlanNativePath parameter. If true, take 409 /// the new code path being implemented for outer loop vectorization 410 /// (should be functional for inner loop vectorization) based on VPlan. 411 /// If false, good old LV code. 412 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath); 413 414 /// Set up outer loop inductions by checking Phis in outer loop header for 415 /// supported inductions (int inductions). Return false if any of these Phis 416 /// is not a supported induction or if we fail to find an induction. 417 bool setupOuterLoopInductions(); 418 419 /// Return true if the pre-header, exiting and latch blocks of \p Lp 420 /// (non-recursive) are considered legal for vectorization. 421 /// Temporarily taking UseVPlanNativePath parameter. If true, take 422 /// the new code path being implemented for outer loop vectorization 423 /// (should be functional for inner loop vectorization) based on VPlan. 424 /// If false, good old LV code. 425 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath); 426 427 /// Check if a single basic block loop is vectorizable. 428 /// At this point we know that this is a loop with a constant trip count 429 /// and we only need to check individual instructions. 430 bool canVectorizeInstrs(); 431 432 /// When we vectorize loops we may change the order in which 433 /// we read and write from memory. This method checks if it is 434 /// legal to vectorize the code, considering only memory constrains. 435 /// Returns true if the loop is vectorizable 436 bool canVectorizeMemory(); 437 438 /// Return true if we can vectorize this loop using the IF-conversion 439 /// transformation. 440 bool canVectorizeWithIfConvert(); 441 442 /// Return true if we can vectorize this outer loop. The method performs 443 /// specific checks for outer loop vectorization. 444 bool canVectorizeOuterLoop(); 445 446 /// Return true if all of the instructions in the block can be speculatively 447 /// executed, and record the loads/stores that require masking. 448 /// \p SafePtrs is a list of addresses that are known to be legal and we know 449 /// that we can read from them without segfault. 450 /// \p MaskedOp is a list of instructions that have to be transformed into 451 /// calls to the appropriate masked intrinsic when the loop is vectorized 452 /// or dropped if the instruction is a conditional assume intrinsic. 453 bool 454 blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, 455 SmallPtrSetImpl<const Instruction *> &MaskedOp) const; 456 457 /// Updates the vectorization state by adding \p Phi to the inductions list. 458 /// This can set \p Phi as the main induction of the loop if \p Phi is a 459 /// better choice for the main induction than the existing one. 460 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, 461 SmallPtrSetImpl<Value *> &AllowedExit); 462 463 /// The loop that we evaluate. 464 Loop *TheLoop; 465 466 /// Loop Info analysis. 467 LoopInfo *LI; 468 469 /// A wrapper around ScalarEvolution used to add runtime SCEV checks. 470 /// Applies dynamic knowledge to simplify SCEV expressions in the context 471 /// of existing SCEV assumptions. The analysis will also add a minimal set 472 /// of new predicates if this is required to enable vectorization and 473 /// unrolling. 474 PredicatedScalarEvolution &PSE; 475 476 /// Target Transform Info. 477 TargetTransformInfo *TTI; 478 479 /// Target Library Info. 480 TargetLibraryInfo *TLI; 481 482 /// Dominator Tree. 483 DominatorTree *DT; 484 485 // LoopAccess analysis. 486 LoopAccessInfoManager &LAIs; 487 488 const LoopAccessInfo *LAI = nullptr; 489 490 /// Interface to emit optimization remarks. 491 OptimizationRemarkEmitter *ORE; 492 493 // --- vectorization state --- // 494 495 /// Holds the primary induction variable. This is the counter of the 496 /// loop. 497 PHINode *PrimaryInduction = nullptr; 498 499 /// Holds the reduction variables. 500 ReductionList Reductions; 501 502 /// Holds all of the induction variables that we found in the loop. 503 /// Notice that inductions don't need to start at zero and that induction 504 /// variables can be pointers. 505 InductionList Inductions; 506 507 /// Holds all the casts that participate in the update chain of the induction 508 /// variables, and that have been proven to be redundant (possibly under a 509 /// runtime guard). These casts can be ignored when creating the vectorized 510 /// loop body. 511 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore; 512 513 /// Holds the phi nodes that are fixed-order recurrences. 514 RecurrenceSet FixedOrderRecurrences; 515 516 /// Holds the widest induction type encountered. 517 Type *WidestIndTy = nullptr; 518 519 /// Allowed outside users. This holds the variables that can be accessed from 520 /// outside the loop. 521 SmallPtrSet<Value *, 4> AllowedExit; 522 523 /// Vectorization requirements that will go through late-evaluation. 524 LoopVectorizationRequirements *Requirements; 525 526 /// Used to emit an analysis of any legality issues. 527 LoopVectorizeHints *Hints; 528 529 /// The demanded bits analysis is used to compute the minimum type size in 530 /// which a reduction can be computed. 531 DemandedBits *DB; 532 533 /// The assumption cache analysis is used to compute the minimum type size in 534 /// which a reduction can be computed. 535 AssumptionCache *AC; 536 537 /// While vectorizing these instructions we have to generate a 538 /// call to the appropriate masked intrinsic or drop them in case of 539 /// conditional assumes. 540 SmallPtrSet<const Instruction *, 8> MaskedOp; 541 542 /// BFI and PSI are used to check for profile guided size optimizations. 543 BlockFrequencyInfo *BFI; 544 ProfileSummaryInfo *PSI; 545 546 /// If we discover function calls within the loop which have a valid 547 /// vectorized variant, record that fact so that LoopVectorize can 548 /// (potentially) make a better decision on the maximum VF and enable 549 /// the use of those function variants. 550 bool VecCallVariantsFound = false; 551 }; 552 553 } // namespace llvm 554 555 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 556