1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines the LoopVectorizationLegality class. Original code 11 /// in Loop Vectorizer has been moved out to its own file for modularity 12 /// and reusability. 13 /// 14 /// Currently, it works for innermost loop vectorization. Extending this to 15 /// outer loop vectorization is a TODO item. 16 /// 17 /// Also provides: 18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations 19 /// locally for easy look up. It has the ability to write them back as 20 /// loop metadata, upon request. 21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose 22 /// of reporting useful failure to vectorize message. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 28 29 #include "llvm/ADT/MapVector.h" 30 #include "llvm/Analysis/LoopAccessAnalysis.h" 31 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 32 #include "llvm/Transforms/Utils/LoopUtils.h" 33 34 namespace llvm { 35 36 /// Create an analysis remark that explains why vectorization failed 37 /// 38 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p 39 /// RemarkName is the identifier for the remark. If \p I is passed it is an 40 /// instruction that prevents vectorization. Otherwise \p TheLoop is used for 41 /// the location of the remark. \return the remark object that can be 42 /// streamed to. 43 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, 44 StringRef RemarkName, 45 Loop *TheLoop, 46 Instruction *I = nullptr); 47 48 /// Utility class for getting and setting loop vectorizer hints in the form 49 /// of loop metadata. 50 /// This class keeps a number of loop annotations locally (as member variables) 51 /// and can, upon request, write them back as metadata on the loop. It will 52 /// initially scan the loop for existing metadata, and will update the local 53 /// values based on information in the loop. 54 /// We cannot write all values to metadata, as the mere presence of some info, 55 /// for example 'force', means a decision has been made. So, we need to be 56 /// careful NOT to add them if the user hasn't specifically asked so. 57 class LoopVectorizeHints { 58 enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED }; 59 60 /// Hint - associates name and validation with the hint value. 61 struct Hint { 62 const char *Name; 63 unsigned Value; // This may have to change for non-numeric values. 64 HintKind Kind; 65 HintHint66 Hint(const char *Name, unsigned Value, HintKind Kind) 67 : Name(Name), Value(Value), Kind(Kind) {} 68 69 bool validate(unsigned Val); 70 }; 71 72 /// Vectorization width. 73 Hint Width; 74 75 /// Vectorization interleave factor. 76 Hint Interleave; 77 78 /// Vectorization forced 79 Hint Force; 80 81 /// Already Vectorized 82 Hint IsVectorized; 83 84 /// Return the loop metadata prefix. Prefix()85 static StringRef Prefix() { return "llvm.loop."; } 86 87 /// True if there is any unsafe math in the loop. 88 bool PotentiallyUnsafe = false; 89 90 public: 91 enum ForceKind { 92 FK_Undefined = -1, ///< Not selected. 93 FK_Disabled = 0, ///< Forcing disabled. 94 FK_Enabled = 1, ///< Forcing enabled. 95 }; 96 97 LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, 98 OptimizationRemarkEmitter &ORE); 99 100 /// Mark the loop L as already vectorized by setting the width to 1. 101 void setAlreadyVectorized(); 102 103 bool allowVectorization(Function *F, Loop *L, 104 bool VectorizeOnlyWhenForced) const; 105 106 /// Dumps all the hint information. 107 void emitRemarkWithHints() const; 108 getWidth()109 unsigned getWidth() const { return Width.Value; } getInterleave()110 unsigned getInterleave() const { return Interleave.Value; } getIsVectorized()111 unsigned getIsVectorized() const { return IsVectorized.Value; } getForce()112 enum ForceKind getForce() const { 113 if ((ForceKind)Force.Value == FK_Undefined && 114 hasDisableAllTransformsHint(TheLoop)) 115 return FK_Disabled; 116 return (ForceKind)Force.Value; 117 } 118 119 /// If hints are provided that force vectorization, use the AlwaysPrint 120 /// pass name to force the frontend to print the diagnostic. 121 const char *vectorizeAnalysisPassName() const; 122 allowReordering()123 bool allowReordering() const { 124 // When enabling loop hints are provided we allow the vectorizer to change 125 // the order of operations that is given by the scalar loop. This is not 126 // enabled by default because can be unsafe or inefficient. For example, 127 // reordering floating-point operations will change the way round-off 128 // error accumulates in the loop. 129 return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; 130 } 131 isPotentiallyUnsafe()132 bool isPotentiallyUnsafe() const { 133 // Avoid FP vectorization if the target is unsure about proper support. 134 // This may be related to the SIMD unit in the target not handling 135 // IEEE 754 FP ops properly, or bad single-to-double promotions. 136 // Otherwise, a sequence of vectorized loops, even without reduction, 137 // could lead to different end results on the destination vectors. 138 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; 139 } 140 setPotentiallyUnsafe()141 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } 142 143 private: 144 /// Find hints specified in the loop metadata and update local values. 145 void getHintsFromMetadata(); 146 147 /// Checks string hint with one operand and set value if valid. 148 void setHint(StringRef Name, Metadata *Arg); 149 150 /// The loop these hints belong to. 151 const Loop *TheLoop; 152 153 /// Interface to emit optimization remarks. 154 OptimizationRemarkEmitter &ORE; 155 }; 156 157 /// This holds vectorization requirements that must be verified late in 158 /// the process. The requirements are set by legalize and costmodel. Once 159 /// vectorization has been determined to be possible and profitable the 160 /// requirements can be verified by looking for metadata or compiler options. 161 /// For example, some loops require FP commutativity which is only allowed if 162 /// vectorization is explicitly specified or if the fast-math compiler option 163 /// has been provided. 164 /// Late evaluation of these requirements allows helpful diagnostics to be 165 /// composed that tells the user what need to be done to vectorize the loop. For 166 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late 167 /// evaluation should be used only when diagnostics can generated that can be 168 /// followed by a non-expert user. 169 class LoopVectorizationRequirements { 170 public: LoopVectorizationRequirements(OptimizationRemarkEmitter & ORE)171 LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} 172 addUnsafeAlgebraInst(Instruction * I)173 void addUnsafeAlgebraInst(Instruction *I) { 174 // First unsafe algebra instruction. 175 if (!UnsafeAlgebraInst) 176 UnsafeAlgebraInst = I; 177 } 178 addRuntimePointerChecks(unsigned Num)179 void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } 180 181 bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints); 182 183 private: 184 unsigned NumRuntimePointerChecks = 0; 185 Instruction *UnsafeAlgebraInst = nullptr; 186 187 /// Interface to emit optimization remarks. 188 OptimizationRemarkEmitter &ORE; 189 }; 190 191 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and 192 /// to what vectorization factor. 193 /// This class does not look at the profitability of vectorization, only the 194 /// legality. This class has two main kinds of checks: 195 /// * Memory checks - The code in canVectorizeMemory checks if vectorization 196 /// will change the order of memory accesses in a way that will change the 197 /// correctness of the program. 198 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory 199 /// checks for a number of different conditions, such as the availability of a 200 /// single induction variable, that all types are supported and vectorize-able, 201 /// etc. This code reflects the capabilities of InnerLoopVectorizer. 202 /// This class is also used by InnerLoopVectorizer for identifying 203 /// induction variable and the different reduction variables. 204 class LoopVectorizationLegality { 205 public: LoopVectorizationLegality(Loop * L,PredicatedScalarEvolution & PSE,DominatorTree * DT,TargetTransformInfo * TTI,TargetLibraryInfo * TLI,AliasAnalysis * AA,Function * F,std::function<const LoopAccessInfo & (Loop &)> * GetLAA,LoopInfo * LI,OptimizationRemarkEmitter * ORE,LoopVectorizationRequirements * R,LoopVectorizeHints * H,DemandedBits * DB,AssumptionCache * AC)206 LoopVectorizationLegality( 207 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, 208 TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA, 209 Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA, 210 LoopInfo *LI, OptimizationRemarkEmitter *ORE, 211 LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, 212 AssumptionCache *AC) 213 : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), 214 GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {} 215 216 /// ReductionList contains the reduction descriptors for all 217 /// of the reductions that were found in the loop. 218 using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>; 219 220 /// InductionList saves induction variables and maps them to the 221 /// induction descriptor. 222 using InductionList = MapVector<PHINode *, InductionDescriptor>; 223 224 /// RecurrenceSet contains the phi nodes that are recurrences other than 225 /// inductions and reductions. 226 using RecurrenceSet = SmallPtrSet<const PHINode *, 8>; 227 228 /// Returns true if it is legal to vectorize this loop. 229 /// This does not mean that it is profitable to vectorize this 230 /// loop, only that it is legal to do so. 231 /// Temporarily taking UseVPlanNativePath parameter. If true, take 232 /// the new code path being implemented for outer loop vectorization 233 /// (should be functional for inner loop vectorization) based on VPlan. 234 /// If false, good old LV code. 235 bool canVectorize(bool UseVPlanNativePath); 236 237 /// Return true if we can vectorize this loop while folding its tail by 238 /// masking. 239 bool canFoldTailByMasking(); 240 241 /// Returns the primary induction variable. getPrimaryInduction()242 PHINode *getPrimaryInduction() { return PrimaryInduction; } 243 244 /// Returns the reduction variables found in the loop. getReductionVars()245 ReductionList *getReductionVars() { return &Reductions; } 246 247 /// Returns the induction variables found in the loop. getInductionVars()248 InductionList *getInductionVars() { return &Inductions; } 249 250 /// Return the first-order recurrences found in the loop. getFirstOrderRecurrences()251 RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; } 252 253 /// Return the set of instructions to sink to handle first-order recurrences. getSinkAfter()254 DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; } 255 256 /// Returns the widest induction type. getWidestInductionType()257 Type *getWidestInductionType() { return WidestIndTy; } 258 259 /// Returns True if V is a Phi node of an induction variable in this loop. 260 bool isInductionPhi(const Value *V); 261 262 /// Returns True if V is a cast that is part of an induction def-use chain, 263 /// and had been proven to be redundant under a runtime guard (in other 264 /// words, the cast has the same SCEV expression as the induction phi). 265 bool isCastedInductionVariable(const Value *V); 266 267 /// Returns True if V can be considered as an induction variable in this 268 /// loop. V can be the induction phi, or some redundant cast in the def-use 269 /// chain of the inducion phi. 270 bool isInductionVariable(const Value *V); 271 272 /// Returns True if PN is a reduction variable in this loop. isReductionVariable(PHINode * PN)273 bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } 274 275 /// Returns True if Phi is a first-order recurrence in this loop. 276 bool isFirstOrderRecurrence(const PHINode *Phi); 277 278 /// Return true if the block BB needs to be predicated in order for the loop 279 /// to be vectorized. 280 bool blockNeedsPredication(BasicBlock *BB); 281 282 /// Check if this pointer is consecutive when vectorizing. This happens 283 /// when the last index of the GEP is the induction variable, or that the 284 /// pointer itself is an induction variable. 285 /// This check allows us to vectorize A[idx] into a wide load/store. 286 /// Returns: 287 /// 0 - Stride is unknown or non-consecutive. 288 /// 1 - Address is consecutive. 289 /// -1 - Address is consecutive, and decreasing. 290 /// NOTE: This method must only be used before modifying the original scalar 291 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). 292 int isConsecutivePtr(Value *Ptr); 293 294 /// Returns true if the value V is uniform within the loop. 295 bool isUniform(Value *V); 296 297 /// Returns the information that we collected about runtime memory check. getRuntimePointerChecking()298 const RuntimePointerChecking *getRuntimePointerChecking() const { 299 return LAI->getRuntimePointerChecking(); 300 } 301 getLAI()302 const LoopAccessInfo *getLAI() const { return LAI; } 303 getMaxSafeDepDistBytes()304 unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } 305 getMaxSafeRegisterWidth()306 uint64_t getMaxSafeRegisterWidth() const { 307 return LAI->getDepChecker().getMaxSafeRegisterWidth(); 308 } 309 hasStride(Value * V)310 bool hasStride(Value *V) { return LAI->hasStride(V); } 311 312 /// Returns true if vector representation of the instruction \p I 313 /// requires mask. isMaskRequired(const Instruction * I)314 bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } 315 getNumStores()316 unsigned getNumStores() const { return LAI->getNumStores(); } getNumLoads()317 unsigned getNumLoads() const { return LAI->getNumLoads(); } 318 319 // Returns true if the NoNaN attribute is set on the function. hasFunNoNaNAttr()320 bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } 321 322 private: 323 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all 324 /// its nested loops are considered legal for vectorization. These legal 325 /// checks are common for inner and outer loop vectorization. 326 /// Temporarily taking UseVPlanNativePath parameter. If true, take 327 /// the new code path being implemented for outer loop vectorization 328 /// (should be functional for inner loop vectorization) based on VPlan. 329 /// If false, good old LV code. 330 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath); 331 332 /// Set up outer loop inductions by checking Phis in outer loop header for 333 /// supported inductions (int inductions). Return false if any of these Phis 334 /// is not a supported induction or if we fail to find an induction. 335 bool setupOuterLoopInductions(); 336 337 /// Return true if the pre-header, exiting and latch blocks of \p Lp 338 /// (non-recursive) are considered legal for vectorization. 339 /// Temporarily taking UseVPlanNativePath parameter. If true, take 340 /// the new code path being implemented for outer loop vectorization 341 /// (should be functional for inner loop vectorization) based on VPlan. 342 /// If false, good old LV code. 343 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath); 344 345 /// Check if a single basic block loop is vectorizable. 346 /// At this point we know that this is a loop with a constant trip count 347 /// and we only need to check individual instructions. 348 bool canVectorizeInstrs(); 349 350 /// When we vectorize loops we may change the order in which 351 /// we read and write from memory. This method checks if it is 352 /// legal to vectorize the code, considering only memory constrains. 353 /// Returns true if the loop is vectorizable 354 bool canVectorizeMemory(); 355 356 /// Return true if we can vectorize this loop using the IF-conversion 357 /// transformation. 358 bool canVectorizeWithIfConvert(); 359 360 /// Return true if we can vectorize this outer loop. The method performs 361 /// specific checks for outer loop vectorization. 362 bool canVectorizeOuterLoop(); 363 364 /// Return true if all of the instructions in the block can be speculatively 365 /// executed. \p SafePtrs is a list of addresses that are known to be legal 366 /// and we know that we can read from them without segfault. 367 bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs); 368 369 /// Updates the vectorization state by adding \p Phi to the inductions list. 370 /// This can set \p Phi as the main induction of the loop if \p Phi is a 371 /// better choice for the main induction than the existing one. 372 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, 373 SmallPtrSetImpl<Value *> &AllowedExit); 374 375 /// If an access has a symbolic strides, this maps the pointer value to 376 /// the stride symbol. getSymbolicStrides()377 const ValueToValueMap *getSymbolicStrides() { 378 // FIXME: Currently, the set of symbolic strides is sometimes queried before 379 // it's collected. This happens from canVectorizeWithIfConvert, when the 380 // pointer is checked to reference consecutive elements suitable for a 381 // masked access. 382 return LAI ? &LAI->getSymbolicStrides() : nullptr; 383 } 384 385 /// Reports a vectorization illegality: print \p DebugMsg for debugging 386 /// purposes along with the corresponding optimization remark \p RemarkName. 387 /// If \p I is passed it is an instruction that prevents vectorization. 388 /// Otherwise the loop is used for the location of the remark. 389 void reportVectorizationFailure(const StringRef DebugMsg, 390 const StringRef OREMsg, const StringRef ORETag, 391 Instruction *I = nullptr) const; 392 393 /// The loop that we evaluate. 394 Loop *TheLoop; 395 396 /// Loop Info analysis. 397 LoopInfo *LI; 398 399 /// A wrapper around ScalarEvolution used to add runtime SCEV checks. 400 /// Applies dynamic knowledge to simplify SCEV expressions in the context 401 /// of existing SCEV assumptions. The analysis will also add a minimal set 402 /// of new predicates if this is required to enable vectorization and 403 /// unrolling. 404 PredicatedScalarEvolution &PSE; 405 406 /// Target Transform Info. 407 TargetTransformInfo *TTI; 408 409 /// Target Library Info. 410 TargetLibraryInfo *TLI; 411 412 /// Dominator Tree. 413 DominatorTree *DT; 414 415 // LoopAccess analysis. 416 std::function<const LoopAccessInfo &(Loop &)> *GetLAA; 417 418 // And the loop-accesses info corresponding to this loop. This pointer is 419 // null until canVectorizeMemory sets it up. 420 const LoopAccessInfo *LAI = nullptr; 421 422 /// Interface to emit optimization remarks. 423 OptimizationRemarkEmitter *ORE; 424 425 // --- vectorization state --- // 426 427 /// Holds the primary induction variable. This is the counter of the 428 /// loop. 429 PHINode *PrimaryInduction = nullptr; 430 431 /// Holds the reduction variables. 432 ReductionList Reductions; 433 434 /// Holds all of the induction variables that we found in the loop. 435 /// Notice that inductions don't need to start at zero and that induction 436 /// variables can be pointers. 437 InductionList Inductions; 438 439 /// Holds all the casts that participate in the update chain of the induction 440 /// variables, and that have been proven to be redundant (possibly under a 441 /// runtime guard). These casts can be ignored when creating the vectorized 442 /// loop body. 443 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore; 444 445 /// Holds the phi nodes that are first-order recurrences. 446 RecurrenceSet FirstOrderRecurrences; 447 448 /// Holds instructions that need to sink past other instructions to handle 449 /// first-order recurrences. 450 DenseMap<Instruction *, Instruction *> SinkAfter; 451 452 /// Holds the widest induction type encountered. 453 Type *WidestIndTy = nullptr; 454 455 /// Allowed outside users. This holds the variables that can be accessed from 456 /// outside the loop. 457 SmallPtrSet<Value *, 4> AllowedExit; 458 459 /// Can we assume the absence of NaNs. 460 bool HasFunNoNaNAttr = false; 461 462 /// Vectorization requirements that will go through late-evaluation. 463 LoopVectorizationRequirements *Requirements; 464 465 /// Used to emit an analysis of any legality issues. 466 LoopVectorizeHints *Hints; 467 468 /// The demanded bits analysis is used to compute the minimum type size in 469 /// which a reduction can be computed. 470 DemandedBits *DB; 471 472 /// The assumption cache analysis is used to compute the minimum type size in 473 /// which a reduction can be computed. 474 AssumptionCache *AC; 475 476 /// While vectorizing these instructions we have to generate a 477 /// call to the appropriate masked intrinsic 478 SmallPtrSet<const Instruction *, 8> MaskedOp; 479 }; 480 481 } // namespace llvm 482 483 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 484