1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines the LoopVectorizationLegality class. Original code
11 /// in Loop Vectorizer has been moved out to its own file for modularity
12 /// and reusability.
13 ///
14 /// Currently, it works for innermost loop vectorization. Extending this to
15 /// outer loop vectorization is a TODO item.
16 ///
17 /// Also provides:
18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
19 /// locally for easy look up. It has the ability to write them back as
20 /// loop metadata, upon request.
21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22 /// of reporting useful failure to vectorize message.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 
29 #include "llvm/ADT/MapVector.h"
30 #include "llvm/Analysis/LoopAccessAnalysis.h"
31 #include "llvm/Support/TypeSize.h"
32 #include "llvm/Transforms/Utils/LoopUtils.h"
33 
34 namespace llvm {
35 class AAResults;
36 class AssumptionCache;
37 class BasicBlock;
38 class BlockFrequencyInfo;
39 class DemandedBits;
40 class DominatorTree;
41 class Function;
42 class Loop;
43 class LoopInfo;
44 class Metadata;
45 class OptimizationRemarkEmitter;
46 class PredicatedScalarEvolution;
47 class ProfileSummaryInfo;
48 class TargetLibraryInfo;
49 class TargetTransformInfo;
50 class Type;
51 
52 /// Utility class for getting and setting loop vectorizer hints in the form
53 /// of loop metadata.
54 /// This class keeps a number of loop annotations locally (as member variables)
55 /// and can, upon request, write them back as metadata on the loop. It will
56 /// initially scan the loop for existing metadata, and will update the local
57 /// values based on information in the loop.
58 /// We cannot write all values to metadata, as the mere presence of some info,
59 /// for example 'force', means a decision has been made. So, we need to be
60 /// careful NOT to add them if the user hasn't specifically asked so.
61 class LoopVectorizeHints {
62   enum HintKind {
63     HK_WIDTH,
64     HK_INTERLEAVE,
65     HK_FORCE,
66     HK_ISVECTORIZED,
67     HK_PREDICATE,
68     HK_SCALABLE
69   };
70 
71   /// Hint - associates name and validation with the hint value.
72   struct Hint {
73     const char *Name;
74     unsigned Value; // This may have to change for non-numeric values.
75     HintKind Kind;
76 
77     Hint(const char *Name, unsigned Value, HintKind Kind)
78         : Name(Name), Value(Value), Kind(Kind) {}
79 
80     bool validate(unsigned Val);
81   };
82 
83   /// Vectorization width.
84   Hint Width;
85 
86   /// Vectorization interleave factor.
87   Hint Interleave;
88 
89   /// Vectorization forced
90   Hint Force;
91 
92   /// Already Vectorized
93   Hint IsVectorized;
94 
95   /// Vector Predicate
96   Hint Predicate;
97 
98   /// Says whether we should use fixed width or scalable vectorization.
99   Hint Scalable;
100 
101   /// Return the loop metadata prefix.
102   static StringRef Prefix() { return "llvm.loop."; }
103 
104   /// True if there is any unsafe math in the loop.
105   bool PotentiallyUnsafe = false;
106 
107 public:
108   enum ForceKind {
109     FK_Undefined = -1, ///< Not selected.
110     FK_Disabled = 0,   ///< Forcing disabled.
111     FK_Enabled = 1,    ///< Forcing enabled.
112   };
113 
114   enum ScalableForceKind {
115     /// Not selected.
116     SK_Unspecified = -1,
117     /// Disables vectorization with scalable vectors.
118     SK_FixedWidthOnly = 0,
119     /// Vectorize loops using scalable vectors or fixed-width vectors, but favor
120     /// scalable vectors when the cost-model is inconclusive. This is the
121     /// default when the scalable.enable hint is enabled through a pragma.
122     SK_PreferScalable = 1
123   };
124 
125   LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
126                      OptimizationRemarkEmitter &ORE,
127                      const TargetTransformInfo *TTI = nullptr);
128 
129   /// Mark the loop L as already vectorized by setting the width to 1.
130   void setAlreadyVectorized();
131 
132   bool allowVectorization(Function *F, Loop *L,
133                           bool VectorizeOnlyWhenForced) const;
134 
135   /// Dumps all the hint information.
136   void emitRemarkWithHints() const;
137 
138   ElementCount getWidth() const {
139     return ElementCount::get(Width.Value, (ScalableForceKind)Scalable.Value ==
140                                               SK_PreferScalable);
141   }
142 
143   unsigned getInterleave() const {
144     if (Interleave.Value)
145       return Interleave.Value;
146     // If interleaving is not explicitly set, assume that if we do not want
147     // unrolling, we also don't want any interleaving.
148     if (llvm::hasUnrollTransformation(TheLoop) & TM_Disable)
149       return 1;
150     return 0;
151   }
152   unsigned getIsVectorized() const { return IsVectorized.Value; }
153   unsigned getPredicate() const { return Predicate.Value; }
154   enum ForceKind getForce() const {
155     if ((ForceKind)Force.Value == FK_Undefined &&
156         hasDisableAllTransformsHint(TheLoop))
157       return FK_Disabled;
158     return (ForceKind)Force.Value;
159   }
160 
161   /// \return true if scalable vectorization has been explicitly disabled.
162   bool isScalableVectorizationDisabled() const {
163     return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
164   }
165 
166   /// If hints are provided that force vectorization, use the AlwaysPrint
167   /// pass name to force the frontend to print the diagnostic.
168   const char *vectorizeAnalysisPassName() const;
169 
170   /// When enabling loop hints are provided we allow the vectorizer to change
171   /// the order of operations that is given by the scalar loop. This is not
172   /// enabled by default because can be unsafe or inefficient. For example,
173   /// reordering floating-point operations will change the way round-off
174   /// error accumulates in the loop.
175   bool allowReordering() const;
176 
177   bool isPotentiallyUnsafe() const {
178     // Avoid FP vectorization if the target is unsure about proper support.
179     // This may be related to the SIMD unit in the target not handling
180     // IEEE 754 FP ops properly, or bad single-to-double promotions.
181     // Otherwise, a sequence of vectorized loops, even without reduction,
182     // could lead to different end results on the destination vectors.
183     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
184   }
185 
186   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
187 
188 private:
189   /// Find hints specified in the loop metadata and update local values.
190   void getHintsFromMetadata();
191 
192   /// Checks string hint with one operand and set value if valid.
193   void setHint(StringRef Name, Metadata *Arg);
194 
195   /// The loop these hints belong to.
196   const Loop *TheLoop;
197 
198   /// Interface to emit optimization remarks.
199   OptimizationRemarkEmitter &ORE;
200 };
201 
202 /// This holds vectorization requirements that must be verified late in
203 /// the process. The requirements are set by legalize and costmodel. Once
204 /// vectorization has been determined to be possible and profitable the
205 /// requirements can be verified by looking for metadata or compiler options.
206 /// For example, some loops require FP commutativity which is only allowed if
207 /// vectorization is explicitly specified or if the fast-math compiler option
208 /// has been provided.
209 /// Late evaluation of these requirements allows helpful diagnostics to be
210 /// composed that tells the user what need to be done to vectorize the loop. For
211 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
212 /// evaluation should be used only when diagnostics can generated that can be
213 /// followed by a non-expert user.
214 class LoopVectorizationRequirements {
215 public:
216   /// Track the 1st floating-point instruction that can not be reassociated.
217   void addExactFPMathInst(Instruction *I) {
218     if (I && !ExactFPMathInst)
219       ExactFPMathInst = I;
220   }
221 
222   Instruction *getExactFPInst() { return ExactFPMathInst; }
223 
224 private:
225   Instruction *ExactFPMathInst = nullptr;
226 };
227 
228 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
229 /// to what vectorization factor.
230 /// This class does not look at the profitability of vectorization, only the
231 /// legality. This class has two main kinds of checks:
232 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
233 ///   will change the order of memory accesses in a way that will change the
234 ///   correctness of the program.
235 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
236 /// checks for a number of different conditions, such as the availability of a
237 /// single induction variable, that all types are supported and vectorize-able,
238 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
239 /// This class is also used by InnerLoopVectorizer for identifying
240 /// induction variable and the different reduction variables.
241 class LoopVectorizationLegality {
242 public:
243   LoopVectorizationLegality(
244       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
245       TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AAResults *AA,
246       Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
247       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
248       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
249       AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
250       : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
251         GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
252         BFI(BFI), PSI(PSI) {}
253 
254   /// ReductionList contains the reduction descriptors for all
255   /// of the reductions that were found in the loop.
256   using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>;
257 
258   /// InductionList saves induction variables and maps them to the
259   /// induction descriptor.
260   using InductionList = MapVector<PHINode *, InductionDescriptor>;
261 
262   /// RecurrenceSet contains the phi nodes that are recurrences other than
263   /// inductions and reductions.
264   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
265 
266   /// Returns true if it is legal to vectorize this loop.
267   /// This does not mean that it is profitable to vectorize this
268   /// loop, only that it is legal to do so.
269   /// Temporarily taking UseVPlanNativePath parameter. If true, take
270   /// the new code path being implemented for outer loop vectorization
271   /// (should be functional for inner loop vectorization) based on VPlan.
272   /// If false, good old LV code.
273   bool canVectorize(bool UseVPlanNativePath);
274 
275   /// Returns true if it is legal to vectorize the FP math operations in this
276   /// loop. Vectorizing is legal if we allow reordering of FP operations, or if
277   /// we can use in-order reductions.
278   bool canVectorizeFPMath(bool EnableStrictReductions);
279 
280   /// Return true if we can vectorize this loop while folding its tail by
281   /// masking, and mark all respective loads/stores for masking.
282   /// This object's state is only modified iff this function returns true.
283   bool prepareToFoldTailByMasking();
284 
285   /// Returns the primary induction variable.
286   PHINode *getPrimaryInduction() { return PrimaryInduction; }
287 
288   /// Returns the reduction variables found in the loop.
289   const ReductionList &getReductionVars() const { return Reductions; }
290 
291   /// Returns the induction variables found in the loop.
292   const InductionList &getInductionVars() const { return Inductions; }
293 
294   /// Return the first-order recurrences found in the loop.
295   RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; }
296 
297   /// Return the set of instructions to sink to handle first-order recurrences.
298   MapVector<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
299 
300   /// Returns the widest induction type.
301   Type *getWidestInductionType() { return WidestIndTy; }
302 
303   /// Returns True if given store is a final invariant store of one of the
304   /// reductions found in the loop.
305   bool isInvariantStoreOfReduction(StoreInst *SI);
306 
307   /// Returns True if given address is invariant and is used to store recurrent
308   /// expression
309   bool isInvariantAddressOfReduction(Value *V);
310 
311   /// Returns True if V is a Phi node of an induction variable in this loop.
312   bool isInductionPhi(const Value *V) const;
313 
314   /// Returns a pointer to the induction descriptor, if \p Phi is an integer or
315   /// floating point induction.
316   const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const;
317 
318   /// Returns a pointer to the induction descriptor, if \p Phi is pointer
319   /// induction.
320   const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const;
321 
322   /// Returns True if V is a cast that is part of an induction def-use chain,
323   /// and had been proven to be redundant under a runtime guard (in other
324   /// words, the cast has the same SCEV expression as the induction phi).
325   bool isCastedInductionVariable(const Value *V) const;
326 
327   /// Returns True if V can be considered as an induction variable in this
328   /// loop. V can be the induction phi, or some redundant cast in the def-use
329   /// chain of the inducion phi.
330   bool isInductionVariable(const Value *V) const;
331 
332   /// Returns True if PN is a reduction variable in this loop.
333   bool isReductionVariable(PHINode *PN) const { return Reductions.count(PN); }
334 
335   /// Returns True if Phi is a first-order recurrence in this loop.
336   bool isFirstOrderRecurrence(const PHINode *Phi) const;
337 
338   /// Return true if the block BB needs to be predicated in order for the loop
339   /// to be vectorized.
340   bool blockNeedsPredication(BasicBlock *BB) const;
341 
342   /// Check if this pointer is consecutive when vectorizing. This happens
343   /// when the last index of the GEP is the induction variable, or that the
344   /// pointer itself is an induction variable.
345   /// This check allows us to vectorize A[idx] into a wide load/store.
346   /// Returns:
347   /// 0 - Stride is unknown or non-consecutive.
348   /// 1 - Address is consecutive.
349   /// -1 - Address is consecutive, and decreasing.
350   /// NOTE: This method must only be used before modifying the original scalar
351   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
352   int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
353 
354   /// Returns true if the value V is uniform within the loop.
355   bool isUniform(Value *V);
356 
357   /// A uniform memory op is a load or store which accesses the same memory
358   /// location on all lanes.
359   bool isUniformMemOp(Instruction &I) {
360     Value *Ptr = getLoadStorePointerOperand(&I);
361     if (!Ptr)
362       return false;
363     // Note: There's nothing inherent which prevents predicated loads and
364     // stores from being uniform.  The current lowering simply doesn't handle
365     // it; in particular, the cost model distinguishes scatter/gather from
366     // scalar w/predication, and we currently rely on the scalar path.
367     return isUniform(Ptr) && !blockNeedsPredication(I.getParent());
368   }
369 
370   /// Returns the information that we collected about runtime memory check.
371   const RuntimePointerChecking *getRuntimePointerChecking() const {
372     return LAI->getRuntimePointerChecking();
373   }
374 
375   const LoopAccessInfo *getLAI() const { return LAI; }
376 
377   bool isSafeForAnyVectorWidth() const {
378     return LAI->getDepChecker().isSafeForAnyVectorWidth();
379   }
380 
381   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
382 
383   uint64_t getMaxSafeVectorWidthInBits() const {
384     return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
385   }
386 
387   bool hasStride(Value *V) { return LAI->hasStride(V); }
388 
389   /// Returns true if vector representation of the instruction \p I
390   /// requires mask.
391   bool isMaskRequired(const Instruction *I) const {
392     return MaskedOp.contains(I);
393   }
394 
395   unsigned getNumStores() const { return LAI->getNumStores(); }
396   unsigned getNumLoads() const { return LAI->getNumLoads(); }
397 
398   /// Returns all assume calls in predicated blocks. They need to be dropped
399   /// when flattening the CFG.
400   const SmallPtrSetImpl<Instruction *> &getConditionalAssumes() const {
401     return ConditionalAssumes;
402   }
403 
404 private:
405   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
406   /// its nested loops are considered legal for vectorization. These legal
407   /// checks are common for inner and outer loop vectorization.
408   /// Temporarily taking UseVPlanNativePath parameter. If true, take
409   /// the new code path being implemented for outer loop vectorization
410   /// (should be functional for inner loop vectorization) based on VPlan.
411   /// If false, good old LV code.
412   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
413 
414   /// Set up outer loop inductions by checking Phis in outer loop header for
415   /// supported inductions (int inductions). Return false if any of these Phis
416   /// is not a supported induction or if we fail to find an induction.
417   bool setupOuterLoopInductions();
418 
419   /// Return true if the pre-header, exiting and latch blocks of \p Lp
420   /// (non-recursive) are considered legal for vectorization.
421   /// Temporarily taking UseVPlanNativePath parameter. If true, take
422   /// the new code path being implemented for outer loop vectorization
423   /// (should be functional for inner loop vectorization) based on VPlan.
424   /// If false, good old LV code.
425   bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
426 
427   /// Check if a single basic block loop is vectorizable.
428   /// At this point we know that this is a loop with a constant trip count
429   /// and we only need to check individual instructions.
430   bool canVectorizeInstrs();
431 
432   /// When we vectorize loops we may change the order in which
433   /// we read and write from memory. This method checks if it is
434   /// legal to vectorize the code, considering only memory constrains.
435   /// Returns true if the loop is vectorizable
436   bool canVectorizeMemory();
437 
438   /// Return true if we can vectorize this loop using the IF-conversion
439   /// transformation.
440   bool canVectorizeWithIfConvert();
441 
442   /// Return true if we can vectorize this outer loop. The method performs
443   /// specific checks for outer loop vectorization.
444   bool canVectorizeOuterLoop();
445 
446   /// Return true if all of the instructions in the block can be speculatively
447   /// executed, and record the loads/stores that require masking.
448   /// \p SafePtrs is a list of addresses that are known to be legal and we know
449   /// that we can read from them without segfault.
450   /// \p MaskedOp is a list of instructions that have to be transformed into
451   /// calls to the appropriate masked intrinsic when the loop is vectorized.
452   /// \p ConditionalAssumes is a list of assume instructions in predicated
453   /// blocks that must be dropped if the CFG gets flattened.
454   bool blockCanBePredicated(
455       BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
456       SmallPtrSetImpl<const Instruction *> &MaskedOp,
457       SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const;
458 
459   /// Updates the vectorization state by adding \p Phi to the inductions list.
460   /// This can set \p Phi as the main induction of the loop if \p Phi is a
461   /// better choice for the main induction than the existing one.
462   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
463                        SmallPtrSetImpl<Value *> &AllowedExit);
464 
465   /// If an access has a symbolic strides, this maps the pointer value to
466   /// the stride symbol.
467   const ValueToValueMap *getSymbolicStrides() const {
468     // FIXME: Currently, the set of symbolic strides is sometimes queried before
469     // it's collected.  This happens from canVectorizeWithIfConvert, when the
470     // pointer is checked to reference consecutive elements suitable for a
471     // masked access.
472     return LAI ? &LAI->getSymbolicStrides() : nullptr;
473   }
474 
475   /// The loop that we evaluate.
476   Loop *TheLoop;
477 
478   /// Loop Info analysis.
479   LoopInfo *LI;
480 
481   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
482   /// Applies dynamic knowledge to simplify SCEV expressions in the context
483   /// of existing SCEV assumptions. The analysis will also add a minimal set
484   /// of new predicates if this is required to enable vectorization and
485   /// unrolling.
486   PredicatedScalarEvolution &PSE;
487 
488   /// Target Transform Info.
489   TargetTransformInfo *TTI;
490 
491   /// Target Library Info.
492   TargetLibraryInfo *TLI;
493 
494   /// Dominator Tree.
495   DominatorTree *DT;
496 
497   // LoopAccess analysis.
498   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
499 
500   // And the loop-accesses info corresponding to this loop.  This pointer is
501   // null until canVectorizeMemory sets it up.
502   const LoopAccessInfo *LAI = nullptr;
503 
504   /// Interface to emit optimization remarks.
505   OptimizationRemarkEmitter *ORE;
506 
507   //  ---  vectorization state --- //
508 
509   /// Holds the primary induction variable. This is the counter of the
510   /// loop.
511   PHINode *PrimaryInduction = nullptr;
512 
513   /// Holds the reduction variables.
514   ReductionList Reductions;
515 
516   /// Holds all of the induction variables that we found in the loop.
517   /// Notice that inductions don't need to start at zero and that induction
518   /// variables can be pointers.
519   InductionList Inductions;
520 
521   /// Holds all the casts that participate in the update chain of the induction
522   /// variables, and that have been proven to be redundant (possibly under a
523   /// runtime guard). These casts can be ignored when creating the vectorized
524   /// loop body.
525   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
526 
527   /// Holds the phi nodes that are first-order recurrences.
528   RecurrenceSet FirstOrderRecurrences;
529 
530   /// Holds instructions that need to sink past other instructions to handle
531   /// first-order recurrences.
532   MapVector<Instruction *, Instruction *> SinkAfter;
533 
534   /// Holds the widest induction type encountered.
535   Type *WidestIndTy = nullptr;
536 
537   /// Allowed outside users. This holds the variables that can be accessed from
538   /// outside the loop.
539   SmallPtrSet<Value *, 4> AllowedExit;
540 
541   /// Vectorization requirements that will go through late-evaluation.
542   LoopVectorizationRequirements *Requirements;
543 
544   /// Used to emit an analysis of any legality issues.
545   LoopVectorizeHints *Hints;
546 
547   /// The demanded bits analysis is used to compute the minimum type size in
548   /// which a reduction can be computed.
549   DemandedBits *DB;
550 
551   /// The assumption cache analysis is used to compute the minimum type size in
552   /// which a reduction can be computed.
553   AssumptionCache *AC;
554 
555   /// While vectorizing these instructions we have to generate a
556   /// call to the appropriate masked intrinsic
557   SmallPtrSet<const Instruction *, 8> MaskedOp;
558 
559   /// Assume instructions in predicated blocks must be dropped if the CFG gets
560   /// flattened.
561   SmallPtrSet<Instruction *, 8> ConditionalAssumes;
562 
563   /// BFI and PSI are used to check for profile guided size optimizations.
564   BlockFrequencyInfo *BFI;
565   ProfileSummaryInfo *PSI;
566 };
567 
568 } // namespace llvm
569 
570 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
571