1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines the LoopVectorizationLegality class. Original code 11 /// in Loop Vectorizer has been moved out to its own file for modularity 12 /// and reusability. 13 /// 14 /// Currently, it works for innermost loop vectorization. Extending this to 15 /// outer loop vectorization is a TODO item. 16 /// 17 /// Also provides: 18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations 19 /// locally for easy look up. It has the ability to write them back as 20 /// loop metadata, upon request. 21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose 22 /// of reporting useful failure to vectorize message. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 28 29 #include "llvm/ADT/MapVector.h" 30 #include "llvm/Analysis/LoopAccessAnalysis.h" 31 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 32 #include "llvm/Transforms/Utils/LoopUtils.h" 33 34 namespace llvm { 35 36 /// Utility class for getting and setting loop vectorizer hints in the form 37 /// of loop metadata. 38 /// This class keeps a number of loop annotations locally (as member variables) 39 /// and can, upon request, write them back as metadata on the loop. It will 40 /// initially scan the loop for existing metadata, and will update the local 41 /// values based on information in the loop. 42 /// We cannot write all values to metadata, as the mere presence of some info, 43 /// for example 'force', means a decision has been made. So, we need to be 44 /// careful NOT to add them if the user hasn't specifically asked so. 45 class LoopVectorizeHints { 46 enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED, 47 HK_PREDICATE }; 48 49 /// Hint - associates name and validation with the hint value. 50 struct Hint { 51 const char *Name; 52 unsigned Value; // This may have to change for non-numeric values. 53 HintKind Kind; 54 55 Hint(const char *Name, unsigned Value, HintKind Kind) 56 : Name(Name), Value(Value), Kind(Kind) {} 57 58 bool validate(unsigned Val); 59 }; 60 61 /// Vectorization width. 62 Hint Width; 63 64 /// Vectorization interleave factor. 65 Hint Interleave; 66 67 /// Vectorization forced 68 Hint Force; 69 70 /// Already Vectorized 71 Hint IsVectorized; 72 73 /// Vector Predicate 74 Hint Predicate; 75 76 /// Return the loop metadata prefix. 77 static StringRef Prefix() { return "llvm.loop."; } 78 79 /// True if there is any unsafe math in the loop. 80 bool PotentiallyUnsafe = false; 81 82 public: 83 enum ForceKind { 84 FK_Undefined = -1, ///< Not selected. 85 FK_Disabled = 0, ///< Forcing disabled. 86 FK_Enabled = 1, ///< Forcing enabled. 87 }; 88 89 LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, 90 OptimizationRemarkEmitter &ORE); 91 92 /// Mark the loop L as already vectorized by setting the width to 1. 93 void setAlreadyVectorized(); 94 95 bool allowVectorization(Function *F, Loop *L, 96 bool VectorizeOnlyWhenForced) const; 97 98 /// Dumps all the hint information. 99 void emitRemarkWithHints() const; 100 101 unsigned getWidth() const { return Width.Value; } 102 unsigned getInterleave() const { return Interleave.Value; } 103 unsigned getIsVectorized() const { return IsVectorized.Value; } 104 unsigned getPredicate() const { return Predicate.Value; } 105 enum ForceKind getForce() const { 106 if ((ForceKind)Force.Value == FK_Undefined && 107 hasDisableAllTransformsHint(TheLoop)) 108 return FK_Disabled; 109 return (ForceKind)Force.Value; 110 } 111 112 /// If hints are provided that force vectorization, use the AlwaysPrint 113 /// pass name to force the frontend to print the diagnostic. 114 const char *vectorizeAnalysisPassName() const; 115 116 bool allowReordering() const { 117 // When enabling loop hints are provided we allow the vectorizer to change 118 // the order of operations that is given by the scalar loop. This is not 119 // enabled by default because can be unsafe or inefficient. For example, 120 // reordering floating-point operations will change the way round-off 121 // error accumulates in the loop. 122 return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; 123 } 124 125 bool isPotentiallyUnsafe() const { 126 // Avoid FP vectorization if the target is unsure about proper support. 127 // This may be related to the SIMD unit in the target not handling 128 // IEEE 754 FP ops properly, or bad single-to-double promotions. 129 // Otherwise, a sequence of vectorized loops, even without reduction, 130 // could lead to different end results on the destination vectors. 131 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; 132 } 133 134 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } 135 136 private: 137 /// Find hints specified in the loop metadata and update local values. 138 void getHintsFromMetadata(); 139 140 /// Checks string hint with one operand and set value if valid. 141 void setHint(StringRef Name, Metadata *Arg); 142 143 /// The loop these hints belong to. 144 const Loop *TheLoop; 145 146 /// Interface to emit optimization remarks. 147 OptimizationRemarkEmitter &ORE; 148 }; 149 150 /// This holds vectorization requirements that must be verified late in 151 /// the process. The requirements are set by legalize and costmodel. Once 152 /// vectorization has been determined to be possible and profitable the 153 /// requirements can be verified by looking for metadata or compiler options. 154 /// For example, some loops require FP commutativity which is only allowed if 155 /// vectorization is explicitly specified or if the fast-math compiler option 156 /// has been provided. 157 /// Late evaluation of these requirements allows helpful diagnostics to be 158 /// composed that tells the user what need to be done to vectorize the loop. For 159 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late 160 /// evaluation should be used only when diagnostics can generated that can be 161 /// followed by a non-expert user. 162 class LoopVectorizationRequirements { 163 public: 164 LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} 165 166 void addUnsafeAlgebraInst(Instruction *I) { 167 // First unsafe algebra instruction. 168 if (!UnsafeAlgebraInst) 169 UnsafeAlgebraInst = I; 170 } 171 172 void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } 173 174 bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints); 175 176 private: 177 unsigned NumRuntimePointerChecks = 0; 178 Instruction *UnsafeAlgebraInst = nullptr; 179 180 /// Interface to emit optimization remarks. 181 OptimizationRemarkEmitter &ORE; 182 }; 183 184 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and 185 /// to what vectorization factor. 186 /// This class does not look at the profitability of vectorization, only the 187 /// legality. This class has two main kinds of checks: 188 /// * Memory checks - The code in canVectorizeMemory checks if vectorization 189 /// will change the order of memory accesses in a way that will change the 190 /// correctness of the program. 191 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory 192 /// checks for a number of different conditions, such as the availability of a 193 /// single induction variable, that all types are supported and vectorize-able, 194 /// etc. This code reflects the capabilities of InnerLoopVectorizer. 195 /// This class is also used by InnerLoopVectorizer for identifying 196 /// induction variable and the different reduction variables. 197 class LoopVectorizationLegality { 198 public: 199 LoopVectorizationLegality( 200 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, 201 TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AAResults *AA, 202 Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA, 203 LoopInfo *LI, OptimizationRemarkEmitter *ORE, 204 LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, 205 AssumptionCache *AC) 206 : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), 207 GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {} 208 209 /// ReductionList contains the reduction descriptors for all 210 /// of the reductions that were found in the loop. 211 using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>; 212 213 /// InductionList saves induction variables and maps them to the 214 /// induction descriptor. 215 using InductionList = MapVector<PHINode *, InductionDescriptor>; 216 217 /// RecurrenceSet contains the phi nodes that are recurrences other than 218 /// inductions and reductions. 219 using RecurrenceSet = SmallPtrSet<const PHINode *, 8>; 220 221 /// Returns true if it is legal to vectorize this loop. 222 /// This does not mean that it is profitable to vectorize this 223 /// loop, only that it is legal to do so. 224 /// Temporarily taking UseVPlanNativePath parameter. If true, take 225 /// the new code path being implemented for outer loop vectorization 226 /// (should be functional for inner loop vectorization) based on VPlan. 227 /// If false, good old LV code. 228 bool canVectorize(bool UseVPlanNativePath); 229 230 /// Return true if we can vectorize this loop while folding its tail by 231 /// masking, and mark all respective loads/stores for masking. 232 bool prepareToFoldTailByMasking(); 233 234 /// Returns the primary induction variable. 235 PHINode *getPrimaryInduction() { return PrimaryInduction; } 236 237 /// Returns the reduction variables found in the loop. 238 ReductionList &getReductionVars() { return Reductions; } 239 240 /// Returns the induction variables found in the loop. 241 InductionList &getInductionVars() { return Inductions; } 242 243 /// Return the first-order recurrences found in the loop. 244 RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; } 245 246 /// Return the set of instructions to sink to handle first-order recurrences. 247 DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; } 248 249 /// Returns the widest induction type. 250 Type *getWidestInductionType() { return WidestIndTy; } 251 252 /// Returns True if V is a Phi node of an induction variable in this loop. 253 bool isInductionPhi(const Value *V); 254 255 /// Returns True if V is a cast that is part of an induction def-use chain, 256 /// and had been proven to be redundant under a runtime guard (in other 257 /// words, the cast has the same SCEV expression as the induction phi). 258 bool isCastedInductionVariable(const Value *V); 259 260 /// Returns True if V can be considered as an induction variable in this 261 /// loop. V can be the induction phi, or some redundant cast in the def-use 262 /// chain of the inducion phi. 263 bool isInductionVariable(const Value *V); 264 265 /// Returns True if PN is a reduction variable in this loop. 266 bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } 267 268 /// Returns True if Phi is a first-order recurrence in this loop. 269 bool isFirstOrderRecurrence(const PHINode *Phi); 270 271 /// Return true if the block BB needs to be predicated in order for the loop 272 /// to be vectorized. 273 bool blockNeedsPredication(BasicBlock *BB); 274 275 /// Check if this pointer is consecutive when vectorizing. This happens 276 /// when the last index of the GEP is the induction variable, or that the 277 /// pointer itself is an induction variable. 278 /// This check allows us to vectorize A[idx] into a wide load/store. 279 /// Returns: 280 /// 0 - Stride is unknown or non-consecutive. 281 /// 1 - Address is consecutive. 282 /// -1 - Address is consecutive, and decreasing. 283 /// NOTE: This method must only be used before modifying the original scalar 284 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). 285 int isConsecutivePtr(Value *Ptr); 286 287 /// Returns true if the value V is uniform within the loop. 288 bool isUniform(Value *V); 289 290 /// Returns the information that we collected about runtime memory check. 291 const RuntimePointerChecking *getRuntimePointerChecking() const { 292 return LAI->getRuntimePointerChecking(); 293 } 294 295 const LoopAccessInfo *getLAI() const { return LAI; } 296 297 unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } 298 299 uint64_t getMaxSafeRegisterWidth() const { 300 return LAI->getDepChecker().getMaxSafeRegisterWidth(); 301 } 302 303 bool hasStride(Value *V) { return LAI->hasStride(V); } 304 305 /// Returns true if vector representation of the instruction \p I 306 /// requires mask. 307 bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } 308 309 unsigned getNumStores() const { return LAI->getNumStores(); } 310 unsigned getNumLoads() const { return LAI->getNumLoads(); } 311 312 // Returns true if the NoNaN attribute is set on the function. 313 bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } 314 315 /// Returns all assume calls in predicated blocks. They need to be dropped 316 /// when flattening the CFG. 317 const SmallPtrSetImpl<Instruction *> &getConditionalAssumes() const { 318 return ConditionalAssumes; 319 } 320 321 private: 322 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all 323 /// its nested loops are considered legal for vectorization. These legal 324 /// checks are common for inner and outer loop vectorization. 325 /// Temporarily taking UseVPlanNativePath parameter. If true, take 326 /// the new code path being implemented for outer loop vectorization 327 /// (should be functional for inner loop vectorization) based on VPlan. 328 /// If false, good old LV code. 329 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath); 330 331 /// Set up outer loop inductions by checking Phis in outer loop header for 332 /// supported inductions (int inductions). Return false if any of these Phis 333 /// is not a supported induction or if we fail to find an induction. 334 bool setupOuterLoopInductions(); 335 336 /// Return true if the pre-header, exiting and latch blocks of \p Lp 337 /// (non-recursive) are considered legal for vectorization. 338 /// Temporarily taking UseVPlanNativePath parameter. If true, take 339 /// the new code path being implemented for outer loop vectorization 340 /// (should be functional for inner loop vectorization) based on VPlan. 341 /// If false, good old LV code. 342 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath); 343 344 /// Check if a single basic block loop is vectorizable. 345 /// At this point we know that this is a loop with a constant trip count 346 /// and we only need to check individual instructions. 347 bool canVectorizeInstrs(); 348 349 /// When we vectorize loops we may change the order in which 350 /// we read and write from memory. This method checks if it is 351 /// legal to vectorize the code, considering only memory constrains. 352 /// Returns true if the loop is vectorizable 353 bool canVectorizeMemory(); 354 355 /// Return true if we can vectorize this loop using the IF-conversion 356 /// transformation. 357 bool canVectorizeWithIfConvert(); 358 359 /// Return true if we can vectorize this outer loop. The method performs 360 /// specific checks for outer loop vectorization. 361 bool canVectorizeOuterLoop(); 362 363 /// Return true if all of the instructions in the block can be speculatively 364 /// executed, and record the loads/stores that require masking. If's that 365 /// guard loads can be ignored under "assume safety" unless \p PreserveGuards 366 /// is true. This can happen when we introduces guards for which the original 367 /// "unguarded-loads are safe" assumption does not hold. For example, the 368 /// vectorizer's fold-tail transformation changes the loop to execute beyond 369 /// its original trip-count, under a proper guard, which should be preserved. 370 /// \p SafePtrs is a list of addresses that are known to be legal and we know 371 /// that we can read from them without segfault. 372 bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, 373 bool PreserveGuards = false); 374 375 /// Updates the vectorization state by adding \p Phi to the inductions list. 376 /// This can set \p Phi as the main induction of the loop if \p Phi is a 377 /// better choice for the main induction than the existing one. 378 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, 379 SmallPtrSetImpl<Value *> &AllowedExit); 380 381 /// If an access has a symbolic strides, this maps the pointer value to 382 /// the stride symbol. 383 const ValueToValueMap *getSymbolicStrides() { 384 // FIXME: Currently, the set of symbolic strides is sometimes queried before 385 // it's collected. This happens from canVectorizeWithIfConvert, when the 386 // pointer is checked to reference consecutive elements suitable for a 387 // masked access. 388 return LAI ? &LAI->getSymbolicStrides() : nullptr; 389 } 390 391 /// The loop that we evaluate. 392 Loop *TheLoop; 393 394 /// Loop Info analysis. 395 LoopInfo *LI; 396 397 /// A wrapper around ScalarEvolution used to add runtime SCEV checks. 398 /// Applies dynamic knowledge to simplify SCEV expressions in the context 399 /// of existing SCEV assumptions. The analysis will also add a minimal set 400 /// of new predicates if this is required to enable vectorization and 401 /// unrolling. 402 PredicatedScalarEvolution &PSE; 403 404 /// Target Transform Info. 405 TargetTransformInfo *TTI; 406 407 /// Target Library Info. 408 TargetLibraryInfo *TLI; 409 410 /// Dominator Tree. 411 DominatorTree *DT; 412 413 // LoopAccess analysis. 414 std::function<const LoopAccessInfo &(Loop &)> *GetLAA; 415 416 // And the loop-accesses info corresponding to this loop. This pointer is 417 // null until canVectorizeMemory sets it up. 418 const LoopAccessInfo *LAI = nullptr; 419 420 /// Interface to emit optimization remarks. 421 OptimizationRemarkEmitter *ORE; 422 423 // --- vectorization state --- // 424 425 /// Holds the primary induction variable. This is the counter of the 426 /// loop. 427 PHINode *PrimaryInduction = nullptr; 428 429 /// Holds the reduction variables. 430 ReductionList Reductions; 431 432 /// Holds all of the induction variables that we found in the loop. 433 /// Notice that inductions don't need to start at zero and that induction 434 /// variables can be pointers. 435 InductionList Inductions; 436 437 /// Holds all the casts that participate in the update chain of the induction 438 /// variables, and that have been proven to be redundant (possibly under a 439 /// runtime guard). These casts can be ignored when creating the vectorized 440 /// loop body. 441 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore; 442 443 /// Holds the phi nodes that are first-order recurrences. 444 RecurrenceSet FirstOrderRecurrences; 445 446 /// Holds instructions that need to sink past other instructions to handle 447 /// first-order recurrences. 448 DenseMap<Instruction *, Instruction *> SinkAfter; 449 450 /// Holds the widest induction type encountered. 451 Type *WidestIndTy = nullptr; 452 453 /// Allowed outside users. This holds the variables that can be accessed from 454 /// outside the loop. 455 SmallPtrSet<Value *, 4> AllowedExit; 456 457 /// Can we assume the absence of NaNs. 458 bool HasFunNoNaNAttr = false; 459 460 /// Vectorization requirements that will go through late-evaluation. 461 LoopVectorizationRequirements *Requirements; 462 463 /// Used to emit an analysis of any legality issues. 464 LoopVectorizeHints *Hints; 465 466 /// The demanded bits analysis is used to compute the minimum type size in 467 /// which a reduction can be computed. 468 DemandedBits *DB; 469 470 /// The assumption cache analysis is used to compute the minimum type size in 471 /// which a reduction can be computed. 472 AssumptionCache *AC; 473 474 /// While vectorizing these instructions we have to generate a 475 /// call to the appropriate masked intrinsic 476 SmallPtrSet<const Instruction *, 8> MaskedOp; 477 478 /// Assume instructions in predicated blocks must be dropped if the CFG gets 479 /// flattened. 480 SmallPtrSet<Instruction *, 8> ConditionalAssumes; 481 }; 482 483 } // namespace llvm 484 485 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 486