1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines the LoopVectorizationLegality class. Original code 11 /// in Loop Vectorizer has been moved out to its own file for modularity 12 /// and reusability. 13 /// 14 /// Currently, it works for innermost loop vectorization. Extending this to 15 /// outer loop vectorization is a TODO item. 16 /// 17 /// Also provides: 18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations 19 /// locally for easy look up. It has the ability to write them back as 20 /// loop metadata, upon request. 21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose 22 /// of reporting useful failure to vectorize message. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 28 29 #include "llvm/ADT/MapVector.h" 30 #include "llvm/Analysis/LoopAccessAnalysis.h" 31 #include "llvm/Support/TypeSize.h" 32 #include "llvm/Transforms/Utils/LoopUtils.h" 33 34 namespace llvm { 35 class AAResults; 36 class AssumptionCache; 37 class BasicBlock; 38 class BlockFrequencyInfo; 39 class DemandedBits; 40 class DominatorTree; 41 class Function; 42 class Loop; 43 class LoopInfo; 44 class Metadata; 45 class OptimizationRemarkEmitter; 46 class PredicatedScalarEvolution; 47 class ProfileSummaryInfo; 48 class TargetLibraryInfo; 49 class TargetTransformInfo; 50 class Type; 51 52 /// Utility class for getting and setting loop vectorizer hints in the form 53 /// of loop metadata. 54 /// This class keeps a number of loop annotations locally (as member variables) 55 /// and can, upon request, write them back as metadata on the loop. It will 56 /// initially scan the loop for existing metadata, and will update the local 57 /// values based on information in the loop. 58 /// We cannot write all values to metadata, as the mere presence of some info, 59 /// for example 'force', means a decision has been made. So, we need to be 60 /// careful NOT to add them if the user hasn't specifically asked so. 61 class LoopVectorizeHints { 62 enum HintKind { 63 HK_WIDTH, 64 HK_INTERLEAVE, 65 HK_FORCE, 66 HK_ISVECTORIZED, 67 HK_PREDICATE, 68 HK_SCALABLE 69 }; 70 71 /// Hint - associates name and validation with the hint value. 72 struct Hint { 73 const char *Name; 74 unsigned Value; // This may have to change for non-numeric values. 75 HintKind Kind; 76 HintHint77 Hint(const char *Name, unsigned Value, HintKind Kind) 78 : Name(Name), Value(Value), Kind(Kind) {} 79 80 bool validate(unsigned Val); 81 }; 82 83 /// Vectorization width. 84 Hint Width; 85 86 /// Vectorization interleave factor. 87 Hint Interleave; 88 89 /// Vectorization forced 90 Hint Force; 91 92 /// Already Vectorized 93 Hint IsVectorized; 94 95 /// Vector Predicate 96 Hint Predicate; 97 98 /// Says whether we should use fixed width or scalable vectorization. 99 Hint Scalable; 100 101 /// Return the loop metadata prefix. Prefix()102 static StringRef Prefix() { return "llvm.loop."; } 103 104 /// True if there is any unsafe math in the loop. 105 bool PotentiallyUnsafe = false; 106 107 public: 108 enum ForceKind { 109 FK_Undefined = -1, ///< Not selected. 110 FK_Disabled = 0, ///< Forcing disabled. 111 FK_Enabled = 1, ///< Forcing enabled. 112 }; 113 114 enum ScalableForceKind { 115 /// Not selected. 116 SK_Unspecified = -1, 117 /// Disables vectorization with scalable vectors. 118 SK_FixedWidthOnly = 0, 119 /// Vectorize loops using scalable vectors or fixed-width vectors, but favor 120 /// scalable vectors when the cost-model is inconclusive. This is the 121 /// default when the scalable.enable hint is enabled through a pragma. 122 SK_PreferScalable = 1 123 }; 124 125 LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, 126 OptimizationRemarkEmitter &ORE, 127 const TargetTransformInfo *TTI = nullptr); 128 129 /// Mark the loop L as already vectorized by setting the width to 1. 130 void setAlreadyVectorized(); 131 132 bool allowVectorization(Function *F, Loop *L, 133 bool VectorizeOnlyWhenForced) const; 134 135 /// Dumps all the hint information. 136 void emitRemarkWithHints() const; 137 getWidth()138 ElementCount getWidth() const { 139 return ElementCount::get(Width.Value, (ScalableForceKind)Scalable.Value == 140 SK_PreferScalable); 141 } 142 getInterleave()143 unsigned getInterleave() const { 144 if (Interleave.Value) 145 return Interleave.Value; 146 // If interleaving is not explicitly set, assume that if we do not want 147 // unrolling, we also don't want any interleaving. 148 if (llvm::hasUnrollTransformation(TheLoop) & TM_Disable) 149 return 1; 150 return 0; 151 } getIsVectorized()152 unsigned getIsVectorized() const { return IsVectorized.Value; } getPredicate()153 unsigned getPredicate() const { return Predicate.Value; } getForce()154 enum ForceKind getForce() const { 155 if ((ForceKind)Force.Value == FK_Undefined && 156 hasDisableAllTransformsHint(TheLoop)) 157 return FK_Disabled; 158 return (ForceKind)Force.Value; 159 } 160 161 /// \return true if scalable vectorization has been explicitly disabled. isScalableVectorizationDisabled()162 bool isScalableVectorizationDisabled() const { 163 return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly; 164 } 165 166 /// If hints are provided that force vectorization, use the AlwaysPrint 167 /// pass name to force the frontend to print the diagnostic. 168 const char *vectorizeAnalysisPassName() const; 169 170 /// When enabling loop hints are provided we allow the vectorizer to change 171 /// the order of operations that is given by the scalar loop. This is not 172 /// enabled by default because can be unsafe or inefficient. For example, 173 /// reordering floating-point operations will change the way round-off 174 /// error accumulates in the loop. 175 bool allowReordering() const; 176 isPotentiallyUnsafe()177 bool isPotentiallyUnsafe() const { 178 // Avoid FP vectorization if the target is unsure about proper support. 179 // This may be related to the SIMD unit in the target not handling 180 // IEEE 754 FP ops properly, or bad single-to-double promotions. 181 // Otherwise, a sequence of vectorized loops, even without reduction, 182 // could lead to different end results on the destination vectors. 183 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; 184 } 185 setPotentiallyUnsafe()186 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } 187 188 private: 189 /// Find hints specified in the loop metadata and update local values. 190 void getHintsFromMetadata(); 191 192 /// Checks string hint with one operand and set value if valid. 193 void setHint(StringRef Name, Metadata *Arg); 194 195 /// The loop these hints belong to. 196 const Loop *TheLoop; 197 198 /// Interface to emit optimization remarks. 199 OptimizationRemarkEmitter &ORE; 200 }; 201 202 /// This holds vectorization requirements that must be verified late in 203 /// the process. The requirements are set by legalize and costmodel. Once 204 /// vectorization has been determined to be possible and profitable the 205 /// requirements can be verified by looking for metadata or compiler options. 206 /// For example, some loops require FP commutativity which is only allowed if 207 /// vectorization is explicitly specified or if the fast-math compiler option 208 /// has been provided. 209 /// Late evaluation of these requirements allows helpful diagnostics to be 210 /// composed that tells the user what need to be done to vectorize the loop. For 211 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late 212 /// evaluation should be used only when diagnostics can generated that can be 213 /// followed by a non-expert user. 214 class LoopVectorizationRequirements { 215 public: 216 /// Track the 1st floating-point instruction that can not be reassociated. addExactFPMathInst(Instruction * I)217 void addExactFPMathInst(Instruction *I) { 218 if (I && !ExactFPMathInst) 219 ExactFPMathInst = I; 220 } 221 getExactFPInst()222 Instruction *getExactFPInst() { return ExactFPMathInst; } 223 224 private: 225 Instruction *ExactFPMathInst = nullptr; 226 }; 227 228 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and 229 /// to what vectorization factor. 230 /// This class does not look at the profitability of vectorization, only the 231 /// legality. This class has two main kinds of checks: 232 /// * Memory checks - The code in canVectorizeMemory checks if vectorization 233 /// will change the order of memory accesses in a way that will change the 234 /// correctness of the program. 235 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory 236 /// checks for a number of different conditions, such as the availability of a 237 /// single induction variable, that all types are supported and vectorize-able, 238 /// etc. This code reflects the capabilities of InnerLoopVectorizer. 239 /// This class is also used by InnerLoopVectorizer for identifying 240 /// induction variable and the different reduction variables. 241 class LoopVectorizationLegality { 242 public: LoopVectorizationLegality(Loop * L,PredicatedScalarEvolution & PSE,DominatorTree * DT,TargetTransformInfo * TTI,TargetLibraryInfo * TLI,Function * F,LoopAccessInfoManager & LAIs,LoopInfo * LI,OptimizationRemarkEmitter * ORE,LoopVectorizationRequirements * R,LoopVectorizeHints * H,DemandedBits * DB,AssumptionCache * AC,BlockFrequencyInfo * BFI,ProfileSummaryInfo * PSI)243 LoopVectorizationLegality( 244 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, 245 TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F, 246 LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE, 247 LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, 248 AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) 249 : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs), 250 ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), 251 PSI(PSI) {} 252 253 /// ReductionList contains the reduction descriptors for all 254 /// of the reductions that were found in the loop. 255 using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>; 256 257 /// InductionList saves induction variables and maps them to the 258 /// induction descriptor. 259 using InductionList = MapVector<PHINode *, InductionDescriptor>; 260 261 /// RecurrenceSet contains the phi nodes that are recurrences other than 262 /// inductions and reductions. 263 using RecurrenceSet = SmallPtrSet<const PHINode *, 8>; 264 265 /// Returns true if it is legal to vectorize this loop. 266 /// This does not mean that it is profitable to vectorize this 267 /// loop, only that it is legal to do so. 268 /// Temporarily taking UseVPlanNativePath parameter. If true, take 269 /// the new code path being implemented for outer loop vectorization 270 /// (should be functional for inner loop vectorization) based on VPlan. 271 /// If false, good old LV code. 272 bool canVectorize(bool UseVPlanNativePath); 273 274 /// Returns true if it is legal to vectorize the FP math operations in this 275 /// loop. Vectorizing is legal if we allow reordering of FP operations, or if 276 /// we can use in-order reductions. 277 bool canVectorizeFPMath(bool EnableStrictReductions); 278 279 /// Return true if we can vectorize this loop while folding its tail by 280 /// masking, and mark all respective loads/stores for masking. 281 /// This object's state is only modified iff this function returns true. 282 bool prepareToFoldTailByMasking(); 283 284 /// Returns the primary induction variable. getPrimaryInduction()285 PHINode *getPrimaryInduction() { return PrimaryInduction; } 286 287 /// Returns the reduction variables found in the loop. getReductionVars()288 const ReductionList &getReductionVars() const { return Reductions; } 289 290 /// Returns the induction variables found in the loop. getInductionVars()291 const InductionList &getInductionVars() const { return Inductions; } 292 293 /// Return the fixed-order recurrences found in the loop. getFixedOrderRecurrences()294 RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; } 295 296 /// Return the set of instructions to sink to handle fixed-order recurrences. getSinkAfter()297 MapVector<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; } 298 299 /// Returns the widest induction type. getWidestInductionType()300 Type *getWidestInductionType() { return WidestIndTy; } 301 302 /// Returns True if given store is a final invariant store of one of the 303 /// reductions found in the loop. 304 bool isInvariantStoreOfReduction(StoreInst *SI); 305 306 /// Returns True if given address is invariant and is used to store recurrent 307 /// expression 308 bool isInvariantAddressOfReduction(Value *V); 309 310 /// Returns True if V is a Phi node of an induction variable in this loop. 311 bool isInductionPhi(const Value *V) const; 312 313 /// Returns a pointer to the induction descriptor, if \p Phi is an integer or 314 /// floating point induction. 315 const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const; 316 317 /// Returns a pointer to the induction descriptor, if \p Phi is pointer 318 /// induction. 319 const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const; 320 321 /// Returns True if V is a cast that is part of an induction def-use chain, 322 /// and had been proven to be redundant under a runtime guard (in other 323 /// words, the cast has the same SCEV expression as the induction phi). 324 bool isCastedInductionVariable(const Value *V) const; 325 326 /// Returns True if V can be considered as an induction variable in this 327 /// loop. V can be the induction phi, or some redundant cast in the def-use 328 /// chain of the inducion phi. 329 bool isInductionVariable(const Value *V) const; 330 331 /// Returns True if PN is a reduction variable in this loop. isReductionVariable(PHINode * PN)332 bool isReductionVariable(PHINode *PN) const { return Reductions.count(PN); } 333 334 /// Returns True if Phi is a fixed-order recurrence in this loop. 335 bool isFixedOrderRecurrence(const PHINode *Phi) const; 336 337 /// Return true if the block BB needs to be predicated in order for the loop 338 /// to be vectorized. 339 bool blockNeedsPredication(BasicBlock *BB) const; 340 341 /// Check if this pointer is consecutive when vectorizing. This happens 342 /// when the last index of the GEP is the induction variable, or that the 343 /// pointer itself is an induction variable. 344 /// This check allows us to vectorize A[idx] into a wide load/store. 345 /// Returns: 346 /// 0 - Stride is unknown or non-consecutive. 347 /// 1 - Address is consecutive. 348 /// -1 - Address is consecutive, and decreasing. 349 /// NOTE: This method must only be used before modifying the original scalar 350 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). 351 int isConsecutivePtr(Type *AccessTy, Value *Ptr) const; 352 353 /// Returns true if the value V is uniform within the loop. 354 bool isUniform(Value *V) const; 355 356 /// A uniform memory op is a load or store which accesses the same memory 357 /// location on all lanes. 358 bool isUniformMemOp(Instruction &I) const; 359 360 /// Returns the information that we collected about runtime memory check. getRuntimePointerChecking()361 const RuntimePointerChecking *getRuntimePointerChecking() const { 362 return LAI->getRuntimePointerChecking(); 363 } 364 getLAI()365 const LoopAccessInfo *getLAI() const { return LAI; } 366 isSafeForAnyVectorWidth()367 bool isSafeForAnyVectorWidth() const { 368 return LAI->getDepChecker().isSafeForAnyVectorWidth(); 369 } 370 getMaxSafeDepDistBytes()371 unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } 372 getMaxSafeVectorWidthInBits()373 uint64_t getMaxSafeVectorWidthInBits() const { 374 return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); 375 } 376 hasStride(Value * V)377 bool hasStride(Value *V) { return LAI->hasStride(V); } 378 379 /// Returns true if vector representation of the instruction \p I 380 /// requires mask. isMaskRequired(const Instruction * I)381 bool isMaskRequired(const Instruction *I) const { 382 return MaskedOp.contains(I); 383 } 384 getNumStores()385 unsigned getNumStores() const { return LAI->getNumStores(); } getNumLoads()386 unsigned getNumLoads() const { return LAI->getNumLoads(); } 387 388 /// Returns all assume calls in predicated blocks. They need to be dropped 389 /// when flattening the CFG. getConditionalAssumes()390 const SmallPtrSetImpl<Instruction *> &getConditionalAssumes() const { 391 return ConditionalAssumes; 392 } 393 394 private: 395 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all 396 /// its nested loops are considered legal for vectorization. These legal 397 /// checks are common for inner and outer loop vectorization. 398 /// Temporarily taking UseVPlanNativePath parameter. If true, take 399 /// the new code path being implemented for outer loop vectorization 400 /// (should be functional for inner loop vectorization) based on VPlan. 401 /// If false, good old LV code. 402 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath); 403 404 /// Set up outer loop inductions by checking Phis in outer loop header for 405 /// supported inductions (int inductions). Return false if any of these Phis 406 /// is not a supported induction or if we fail to find an induction. 407 bool setupOuterLoopInductions(); 408 409 /// Return true if the pre-header, exiting and latch blocks of \p Lp 410 /// (non-recursive) are considered legal for vectorization. 411 /// Temporarily taking UseVPlanNativePath parameter. If true, take 412 /// the new code path being implemented for outer loop vectorization 413 /// (should be functional for inner loop vectorization) based on VPlan. 414 /// If false, good old LV code. 415 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath); 416 417 /// Check if a single basic block loop is vectorizable. 418 /// At this point we know that this is a loop with a constant trip count 419 /// and we only need to check individual instructions. 420 bool canVectorizeInstrs(); 421 422 /// When we vectorize loops we may change the order in which 423 /// we read and write from memory. This method checks if it is 424 /// legal to vectorize the code, considering only memory constrains. 425 /// Returns true if the loop is vectorizable 426 bool canVectorizeMemory(); 427 428 /// Return true if we can vectorize this loop using the IF-conversion 429 /// transformation. 430 bool canVectorizeWithIfConvert(); 431 432 /// Return true if we can vectorize this outer loop. The method performs 433 /// specific checks for outer loop vectorization. 434 bool canVectorizeOuterLoop(); 435 436 /// Return true if all of the instructions in the block can be speculatively 437 /// executed, and record the loads/stores that require masking. 438 /// \p SafePtrs is a list of addresses that are known to be legal and we know 439 /// that we can read from them without segfault. 440 /// \p MaskedOp is a list of instructions that have to be transformed into 441 /// calls to the appropriate masked intrinsic when the loop is vectorized. 442 /// \p ConditionalAssumes is a list of assume instructions in predicated 443 /// blocks that must be dropped if the CFG gets flattened. 444 bool blockCanBePredicated( 445 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, 446 SmallPtrSetImpl<const Instruction *> &MaskedOp, 447 SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const; 448 449 /// Updates the vectorization state by adding \p Phi to the inductions list. 450 /// This can set \p Phi as the main induction of the loop if \p Phi is a 451 /// better choice for the main induction than the existing one. 452 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, 453 SmallPtrSetImpl<Value *> &AllowedExit); 454 455 /// If an access has a symbolic strides, this maps the pointer value to 456 /// the stride symbol. getSymbolicStrides()457 const ValueToValueMap *getSymbolicStrides() const { 458 // FIXME: Currently, the set of symbolic strides is sometimes queried before 459 // it's collected. This happens from canVectorizeWithIfConvert, when the 460 // pointer is checked to reference consecutive elements suitable for a 461 // masked access. 462 return LAI ? &LAI->getSymbolicStrides() : nullptr; 463 } 464 465 /// The loop that we evaluate. 466 Loop *TheLoop; 467 468 /// Loop Info analysis. 469 LoopInfo *LI; 470 471 /// A wrapper around ScalarEvolution used to add runtime SCEV checks. 472 /// Applies dynamic knowledge to simplify SCEV expressions in the context 473 /// of existing SCEV assumptions. The analysis will also add a minimal set 474 /// of new predicates if this is required to enable vectorization and 475 /// unrolling. 476 PredicatedScalarEvolution &PSE; 477 478 /// Target Transform Info. 479 TargetTransformInfo *TTI; 480 481 /// Target Library Info. 482 TargetLibraryInfo *TLI; 483 484 /// Dominator Tree. 485 DominatorTree *DT; 486 487 // LoopAccess analysis. 488 LoopAccessInfoManager &LAIs; 489 490 const LoopAccessInfo *LAI = nullptr; 491 492 /// Interface to emit optimization remarks. 493 OptimizationRemarkEmitter *ORE; 494 495 // --- vectorization state --- // 496 497 /// Holds the primary induction variable. This is the counter of the 498 /// loop. 499 PHINode *PrimaryInduction = nullptr; 500 501 /// Holds the reduction variables. 502 ReductionList Reductions; 503 504 /// Holds all of the induction variables that we found in the loop. 505 /// Notice that inductions don't need to start at zero and that induction 506 /// variables can be pointers. 507 InductionList Inductions; 508 509 /// Holds all the casts that participate in the update chain of the induction 510 /// variables, and that have been proven to be redundant (possibly under a 511 /// runtime guard). These casts can be ignored when creating the vectorized 512 /// loop body. 513 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore; 514 515 /// Holds the phi nodes that are fixed-order recurrences. 516 RecurrenceSet FixedOrderRecurrences; 517 518 /// Holds instructions that need to sink past other instructions to handle 519 /// fixed-order recurrences. 520 MapVector<Instruction *, Instruction *> SinkAfter; 521 522 /// Holds the widest induction type encountered. 523 Type *WidestIndTy = nullptr; 524 525 /// Allowed outside users. This holds the variables that can be accessed from 526 /// outside the loop. 527 SmallPtrSet<Value *, 4> AllowedExit; 528 529 /// Vectorization requirements that will go through late-evaluation. 530 LoopVectorizationRequirements *Requirements; 531 532 /// Used to emit an analysis of any legality issues. 533 LoopVectorizeHints *Hints; 534 535 /// The demanded bits analysis is used to compute the minimum type size in 536 /// which a reduction can be computed. 537 DemandedBits *DB; 538 539 /// The assumption cache analysis is used to compute the minimum type size in 540 /// which a reduction can be computed. 541 AssumptionCache *AC; 542 543 /// While vectorizing these instructions we have to generate a 544 /// call to the appropriate masked intrinsic 545 SmallPtrSet<const Instruction *, 8> MaskedOp; 546 547 /// Assume instructions in predicated blocks must be dropped if the CFG gets 548 /// flattened. 549 SmallPtrSet<Instruction *, 8> ConditionalAssumes; 550 551 /// BFI and PSI are used to check for profile guided size optimizations. 552 BlockFrequencyInfo *BFI; 553 ProfileSummaryInfo *PSI; 554 }; 555 556 } // namespace llvm 557 558 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 559