1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines the LoopVectorizationLegality class. Original code
11 /// in Loop Vectorizer has been moved out to its own file for modularity
12 /// and reusability.
13 ///
14 /// Currently, it works for innermost loop vectorization. Extending this to
15 /// outer loop vectorization is a TODO item.
16 ///
17 /// Also provides:
18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
19 /// locally for easy look up. It has the ability to write them back as
20 /// loop metadata, upon request.
21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22 /// of reporting useful failure to vectorize message.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 
29 #include "llvm/ADT/MapVector.h"
30 #include "llvm/Analysis/LoopAccessAnalysis.h"
31 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
32 #include "llvm/Transforms/Utils/LoopUtils.h"
33 
34 namespace llvm {
35 
36 /// Utility class for getting and setting loop vectorizer hints in the form
37 /// of loop metadata.
38 /// This class keeps a number of loop annotations locally (as member variables)
39 /// and can, upon request, write them back as metadata on the loop. It will
40 /// initially scan the loop for existing metadata, and will update the local
41 /// values based on information in the loop.
42 /// We cannot write all values to metadata, as the mere presence of some info,
43 /// for example 'force', means a decision has been made. So, we need to be
44 /// careful NOT to add them if the user hasn't specifically asked so.
45 class LoopVectorizeHints {
46   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
47                   HK_PREDICATE };
48 
49   /// Hint - associates name and validation with the hint value.
50   struct Hint {
51     const char *Name;
52     unsigned Value; // This may have to change for non-numeric values.
53     HintKind Kind;
54 
55     Hint(const char *Name, unsigned Value, HintKind Kind)
56         : Name(Name), Value(Value), Kind(Kind) {}
57 
58     bool validate(unsigned Val);
59   };
60 
61   /// Vectorization width.
62   Hint Width;
63 
64   /// Vectorization interleave factor.
65   Hint Interleave;
66 
67   /// Vectorization forced
68   Hint Force;
69 
70   /// Already Vectorized
71   Hint IsVectorized;
72 
73   /// Vector Predicate
74   Hint Predicate;
75 
76   /// Return the loop metadata prefix.
77   static StringRef Prefix() { return "llvm.loop."; }
78 
79   /// True if there is any unsafe math in the loop.
80   bool PotentiallyUnsafe = false;
81 
82 public:
83   enum ForceKind {
84     FK_Undefined = -1, ///< Not selected.
85     FK_Disabled = 0,   ///< Forcing disabled.
86     FK_Enabled = 1,    ///< Forcing enabled.
87   };
88 
89   LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
90                      OptimizationRemarkEmitter &ORE);
91 
92   /// Mark the loop L as already vectorized by setting the width to 1.
93   void setAlreadyVectorized();
94 
95   bool allowVectorization(Function *F, Loop *L,
96                           bool VectorizeOnlyWhenForced) const;
97 
98   /// Dumps all the hint information.
99   void emitRemarkWithHints() const;
100 
101   unsigned getWidth() const { return Width.Value; }
102   unsigned getInterleave() const { return Interleave.Value; }
103   unsigned getIsVectorized() const { return IsVectorized.Value; }
104   unsigned getPredicate() const { return Predicate.Value; }
105   enum ForceKind getForce() const {
106     if ((ForceKind)Force.Value == FK_Undefined &&
107         hasDisableAllTransformsHint(TheLoop))
108       return FK_Disabled;
109     return (ForceKind)Force.Value;
110   }
111 
112   /// If hints are provided that force vectorization, use the AlwaysPrint
113   /// pass name to force the frontend to print the diagnostic.
114   const char *vectorizeAnalysisPassName() const;
115 
116   bool allowReordering() const {
117     // When enabling loop hints are provided we allow the vectorizer to change
118     // the order of operations that is given by the scalar loop. This is not
119     // enabled by default because can be unsafe or inefficient. For example,
120     // reordering floating-point operations will change the way round-off
121     // error accumulates in the loop.
122     return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
123   }
124 
125   bool isPotentiallyUnsafe() const {
126     // Avoid FP vectorization if the target is unsure about proper support.
127     // This may be related to the SIMD unit in the target not handling
128     // IEEE 754 FP ops properly, or bad single-to-double promotions.
129     // Otherwise, a sequence of vectorized loops, even without reduction,
130     // could lead to different end results on the destination vectors.
131     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
132   }
133 
134   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
135 
136 private:
137   /// Find hints specified in the loop metadata and update local values.
138   void getHintsFromMetadata();
139 
140   /// Checks string hint with one operand and set value if valid.
141   void setHint(StringRef Name, Metadata *Arg);
142 
143   /// The loop these hints belong to.
144   const Loop *TheLoop;
145 
146   /// Interface to emit optimization remarks.
147   OptimizationRemarkEmitter &ORE;
148 };
149 
150 /// This holds vectorization requirements that must be verified late in
151 /// the process. The requirements are set by legalize and costmodel. Once
152 /// vectorization has been determined to be possible and profitable the
153 /// requirements can be verified by looking for metadata or compiler options.
154 /// For example, some loops require FP commutativity which is only allowed if
155 /// vectorization is explicitly specified or if the fast-math compiler option
156 /// has been provided.
157 /// Late evaluation of these requirements allows helpful diagnostics to be
158 /// composed that tells the user what need to be done to vectorize the loop. For
159 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
160 /// evaluation should be used only when diagnostics can generated that can be
161 /// followed by a non-expert user.
162 class LoopVectorizationRequirements {
163 public:
164   LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
165 
166   void addUnsafeAlgebraInst(Instruction *I) {
167     // First unsafe algebra instruction.
168     if (!UnsafeAlgebraInst)
169       UnsafeAlgebraInst = I;
170   }
171 
172   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
173 
174   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
175 
176 private:
177   unsigned NumRuntimePointerChecks = 0;
178   Instruction *UnsafeAlgebraInst = nullptr;
179 
180   /// Interface to emit optimization remarks.
181   OptimizationRemarkEmitter &ORE;
182 };
183 
184 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
185 /// to what vectorization factor.
186 /// This class does not look at the profitability of vectorization, only the
187 /// legality. This class has two main kinds of checks:
188 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
189 ///   will change the order of memory accesses in a way that will change the
190 ///   correctness of the program.
191 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
192 /// checks for a number of different conditions, such as the availability of a
193 /// single induction variable, that all types are supported and vectorize-able,
194 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
195 /// This class is also used by InnerLoopVectorizer for identifying
196 /// induction variable and the different reduction variables.
197 class LoopVectorizationLegality {
198 public:
199   LoopVectorizationLegality(
200       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
201       TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AAResults *AA,
202       Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
203       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
204       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
205       AssumptionCache *AC)
206       : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
207         GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
208 
209   /// ReductionList contains the reduction descriptors for all
210   /// of the reductions that were found in the loop.
211   using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>;
212 
213   /// InductionList saves induction variables and maps them to the
214   /// induction descriptor.
215   using InductionList = MapVector<PHINode *, InductionDescriptor>;
216 
217   /// RecurrenceSet contains the phi nodes that are recurrences other than
218   /// inductions and reductions.
219   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
220 
221   /// Returns true if it is legal to vectorize this loop.
222   /// This does not mean that it is profitable to vectorize this
223   /// loop, only that it is legal to do so.
224   /// Temporarily taking UseVPlanNativePath parameter. If true, take
225   /// the new code path being implemented for outer loop vectorization
226   /// (should be functional for inner loop vectorization) based on VPlan.
227   /// If false, good old LV code.
228   bool canVectorize(bool UseVPlanNativePath);
229 
230   /// Return true if we can vectorize this loop while folding its tail by
231   /// masking, and mark all respective loads/stores for masking.
232   bool prepareToFoldTailByMasking();
233 
234   /// Returns the primary induction variable.
235   PHINode *getPrimaryInduction() { return PrimaryInduction; }
236 
237   /// Returns the reduction variables found in the loop.
238   ReductionList &getReductionVars() { return Reductions; }
239 
240   /// Returns the induction variables found in the loop.
241   InductionList &getInductionVars() { return Inductions; }
242 
243   /// Return the first-order recurrences found in the loop.
244   RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; }
245 
246   /// Return the set of instructions to sink to handle first-order recurrences.
247   DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
248 
249   /// Returns the widest induction type.
250   Type *getWidestInductionType() { return WidestIndTy; }
251 
252   /// Returns True if V is a Phi node of an induction variable in this loop.
253   bool isInductionPhi(const Value *V);
254 
255   /// Returns True if V is a cast that is part of an induction def-use chain,
256   /// and had been proven to be redundant under a runtime guard (in other
257   /// words, the cast has the same SCEV expression as the induction phi).
258   bool isCastedInductionVariable(const Value *V);
259 
260   /// Returns True if V can be considered as an induction variable in this
261   /// loop. V can be the induction phi, or some redundant cast in the def-use
262   /// chain of the inducion phi.
263   bool isInductionVariable(const Value *V);
264 
265   /// Returns True if PN is a reduction variable in this loop.
266   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
267 
268   /// Returns True if Phi is a first-order recurrence in this loop.
269   bool isFirstOrderRecurrence(const PHINode *Phi);
270 
271   /// Return true if the block BB needs to be predicated in order for the loop
272   /// to be vectorized.
273   bool blockNeedsPredication(BasicBlock *BB);
274 
275   /// Check if this pointer is consecutive when vectorizing. This happens
276   /// when the last index of the GEP is the induction variable, or that the
277   /// pointer itself is an induction variable.
278   /// This check allows us to vectorize A[idx] into a wide load/store.
279   /// Returns:
280   /// 0 - Stride is unknown or non-consecutive.
281   /// 1 - Address is consecutive.
282   /// -1 - Address is consecutive, and decreasing.
283   /// NOTE: This method must only be used before modifying the original scalar
284   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
285   int isConsecutivePtr(Value *Ptr);
286 
287   /// Returns true if the value V is uniform within the loop.
288   bool isUniform(Value *V);
289 
290   /// Returns the information that we collected about runtime memory check.
291   const RuntimePointerChecking *getRuntimePointerChecking() const {
292     return LAI->getRuntimePointerChecking();
293   }
294 
295   const LoopAccessInfo *getLAI() const { return LAI; }
296 
297   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
298 
299   uint64_t getMaxSafeRegisterWidth() const {
300     return LAI->getDepChecker().getMaxSafeRegisterWidth();
301   }
302 
303   bool hasStride(Value *V) { return LAI->hasStride(V); }
304 
305   /// Returns true if vector representation of the instruction \p I
306   /// requires mask.
307   bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
308 
309   unsigned getNumStores() const { return LAI->getNumStores(); }
310   unsigned getNumLoads() const { return LAI->getNumLoads(); }
311 
312   // Returns true if the NoNaN attribute is set on the function.
313   bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
314 
315   /// Returns all assume calls in predicated blocks. They need to be dropped
316   /// when flattening the CFG.
317   const SmallPtrSetImpl<Instruction *> &getConditionalAssumes() const {
318     return ConditionalAssumes;
319   }
320 
321 private:
322   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
323   /// its nested loops are considered legal for vectorization. These legal
324   /// checks are common for inner and outer loop vectorization.
325   /// Temporarily taking UseVPlanNativePath parameter. If true, take
326   /// the new code path being implemented for outer loop vectorization
327   /// (should be functional for inner loop vectorization) based on VPlan.
328   /// If false, good old LV code.
329   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
330 
331   /// Set up outer loop inductions by checking Phis in outer loop header for
332   /// supported inductions (int inductions). Return false if any of these Phis
333   /// is not a supported induction or if we fail to find an induction.
334   bool setupOuterLoopInductions();
335 
336   /// Return true if the pre-header, exiting and latch blocks of \p Lp
337   /// (non-recursive) are considered legal for vectorization.
338   /// Temporarily taking UseVPlanNativePath parameter. If true, take
339   /// the new code path being implemented for outer loop vectorization
340   /// (should be functional for inner loop vectorization) based on VPlan.
341   /// If false, good old LV code.
342   bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
343 
344   /// Check if a single basic block loop is vectorizable.
345   /// At this point we know that this is a loop with a constant trip count
346   /// and we only need to check individual instructions.
347   bool canVectorizeInstrs();
348 
349   /// When we vectorize loops we may change the order in which
350   /// we read and write from memory. This method checks if it is
351   /// legal to vectorize the code, considering only memory constrains.
352   /// Returns true if the loop is vectorizable
353   bool canVectorizeMemory();
354 
355   /// Return true if we can vectorize this loop using the IF-conversion
356   /// transformation.
357   bool canVectorizeWithIfConvert();
358 
359   /// Return true if we can vectorize this outer loop. The method performs
360   /// specific checks for outer loop vectorization.
361   bool canVectorizeOuterLoop();
362 
363   /// Return true if all of the instructions in the block can be speculatively
364   /// executed, and record the loads/stores that require masking. If's that
365   /// guard loads can be ignored under "assume safety" unless \p PreserveGuards
366   /// is true. This can happen when we introduces guards for which the original
367   /// "unguarded-loads are safe" assumption does not hold. For example, the
368   /// vectorizer's fold-tail transformation changes the loop to execute beyond
369   /// its original trip-count, under a proper guard, which should be preserved.
370   /// \p SafePtrs is a list of addresses that are known to be legal and we know
371   /// that we can read from them without segfault.
372   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
373                             bool PreserveGuards = false);
374 
375   /// Updates the vectorization state by adding \p Phi to the inductions list.
376   /// This can set \p Phi as the main induction of the loop if \p Phi is a
377   /// better choice for the main induction than the existing one.
378   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
379                        SmallPtrSetImpl<Value *> &AllowedExit);
380 
381   /// If an access has a symbolic strides, this maps the pointer value to
382   /// the stride symbol.
383   const ValueToValueMap *getSymbolicStrides() {
384     // FIXME: Currently, the set of symbolic strides is sometimes queried before
385     // it's collected.  This happens from canVectorizeWithIfConvert, when the
386     // pointer is checked to reference consecutive elements suitable for a
387     // masked access.
388     return LAI ? &LAI->getSymbolicStrides() : nullptr;
389   }
390 
391   /// The loop that we evaluate.
392   Loop *TheLoop;
393 
394   /// Loop Info analysis.
395   LoopInfo *LI;
396 
397   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
398   /// Applies dynamic knowledge to simplify SCEV expressions in the context
399   /// of existing SCEV assumptions. The analysis will also add a minimal set
400   /// of new predicates if this is required to enable vectorization and
401   /// unrolling.
402   PredicatedScalarEvolution &PSE;
403 
404   /// Target Transform Info.
405   TargetTransformInfo *TTI;
406 
407   /// Target Library Info.
408   TargetLibraryInfo *TLI;
409 
410   /// Dominator Tree.
411   DominatorTree *DT;
412 
413   // LoopAccess analysis.
414   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
415 
416   // And the loop-accesses info corresponding to this loop.  This pointer is
417   // null until canVectorizeMemory sets it up.
418   const LoopAccessInfo *LAI = nullptr;
419 
420   /// Interface to emit optimization remarks.
421   OptimizationRemarkEmitter *ORE;
422 
423   //  ---  vectorization state --- //
424 
425   /// Holds the primary induction variable. This is the counter of the
426   /// loop.
427   PHINode *PrimaryInduction = nullptr;
428 
429   /// Holds the reduction variables.
430   ReductionList Reductions;
431 
432   /// Holds all of the induction variables that we found in the loop.
433   /// Notice that inductions don't need to start at zero and that induction
434   /// variables can be pointers.
435   InductionList Inductions;
436 
437   /// Holds all the casts that participate in the update chain of the induction
438   /// variables, and that have been proven to be redundant (possibly under a
439   /// runtime guard). These casts can be ignored when creating the vectorized
440   /// loop body.
441   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
442 
443   /// Holds the phi nodes that are first-order recurrences.
444   RecurrenceSet FirstOrderRecurrences;
445 
446   /// Holds instructions that need to sink past other instructions to handle
447   /// first-order recurrences.
448   DenseMap<Instruction *, Instruction *> SinkAfter;
449 
450   /// Holds the widest induction type encountered.
451   Type *WidestIndTy = nullptr;
452 
453   /// Allowed outside users. This holds the variables that can be accessed from
454   /// outside the loop.
455   SmallPtrSet<Value *, 4> AllowedExit;
456 
457   /// Can we assume the absence of NaNs.
458   bool HasFunNoNaNAttr = false;
459 
460   /// Vectorization requirements that will go through late-evaluation.
461   LoopVectorizationRequirements *Requirements;
462 
463   /// Used to emit an analysis of any legality issues.
464   LoopVectorizeHints *Hints;
465 
466   /// The demanded bits analysis is used to compute the minimum type size in
467   /// which a reduction can be computed.
468   DemandedBits *DB;
469 
470   /// The assumption cache analysis is used to compute the minimum type size in
471   /// which a reduction can be computed.
472   AssumptionCache *AC;
473 
474   /// While vectorizing these instructions we have to generate a
475   /// call to the appropriate masked intrinsic
476   SmallPtrSet<const Instruction *, 8> MaskedOp;
477 
478   /// Assume instructions in predicated blocks must be dropped if the CFG gets
479   /// flattened.
480   SmallPtrSet<Instruction *, 8> ConditionalAssumes;
481 };
482 
483 } // namespace llvm
484 
485 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
486