1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 ///    VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14 ///    within VPBasicBlocks;
15 /// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16 ///    also inherit from VPValue.
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18 ///    instruction;
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27 
28 #include "VPlanAnalysis.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/MapVector.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Twine.h"
36 #include "llvm/ADT/ilist.h"
37 #include "llvm/ADT/ilist_node.h"
38 #include "llvm/Analysis/IVDescriptors.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/VectorUtils.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/FMF.h"
43 #include "llvm/IR/Operator.h"
44 #include <algorithm>
45 #include <cassert>
46 #include <cstddef>
47 #include <string>
48 
49 namespace llvm {
50 
51 class BasicBlock;
52 class DominatorTree;
53 class InnerLoopVectorizer;
54 class IRBuilderBase;
55 class LoopInfo;
56 class raw_ostream;
57 class RecurrenceDescriptor;
58 class SCEV;
59 class Type;
60 class VPBasicBlock;
61 class VPRegionBlock;
62 class VPlan;
63 class VPReplicateRecipe;
64 class VPlanSlp;
65 class Value;
66 class LoopVersioning;
67 
68 namespace Intrinsic {
69 typedef unsigned ID;
70 }
71 
72 /// Returns a calculation for the total number of elements for a given \p VF.
73 /// For fixed width vectors this value is a constant, whereas for scalable
74 /// vectors it is an expression determined at runtime.
75 Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
76 
77 /// Return a value for Step multiplied by VF.
78 Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
79                        int64_t Step);
80 
81 const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
82                                 Loop *CurLoop = nullptr);
83 
84 /// A range of powers-of-2 vectorization factors with fixed start and
85 /// adjustable end. The range includes start and excludes end, e.g.,:
86 /// [1, 16) = {1, 2, 4, 8}
87 struct VFRange {
88   // A power of 2.
89   const ElementCount Start;
90 
91   // A power of 2. If End <= Start range is empty.
92   ElementCount End;
93 
isEmptyVFRange94   bool isEmpty() const {
95     return End.getKnownMinValue() <= Start.getKnownMinValue();
96   }
97 
VFRangeVFRange98   VFRange(const ElementCount &Start, const ElementCount &End)
99       : Start(Start), End(End) {
100     assert(Start.isScalable() == End.isScalable() &&
101            "Both Start and End should have the same scalable flag");
102     assert(isPowerOf2_32(Start.getKnownMinValue()) &&
103            "Expected Start to be a power of 2");
104     assert(isPowerOf2_32(End.getKnownMinValue()) &&
105            "Expected End to be a power of 2");
106   }
107 
108   /// Iterator to iterate over vectorization factors in a VFRange.
109   class iterator
110       : public iterator_facade_base<iterator, std::forward_iterator_tag,
111                                     ElementCount> {
112     ElementCount VF;
113 
114   public:
iteratorVFRange115     iterator(ElementCount VF) : VF(VF) {}
116 
117     bool operator==(const iterator &Other) const { return VF == Other.VF; }
118 
119     ElementCount operator*() const { return VF; }
120 
121     iterator &operator++() {
122       VF *= 2;
123       return *this;
124     }
125   };
126 
beginVFRange127   iterator begin() { return iterator(Start); }
endVFRange128   iterator end() {
129     assert(isPowerOf2_32(End.getKnownMinValue()));
130     return iterator(End);
131   }
132 };
133 
134 using VPlanPtr = std::unique_ptr<VPlan>;
135 
136 /// In what follows, the term "input IR" refers to code that is fed into the
137 /// vectorizer whereas the term "output IR" refers to code that is generated by
138 /// the vectorizer.
139 
140 /// VPLane provides a way to access lanes in both fixed width and scalable
141 /// vectors, where for the latter the lane index sometimes needs calculating
142 /// as a runtime expression.
143 class VPLane {
144 public:
145   /// Kind describes how to interpret Lane.
146   enum class Kind : uint8_t {
147     /// For First, Lane is the index into the first N elements of a
148     /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
149     First,
150     /// For ScalableLast, Lane is the offset from the start of the last
151     /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
152     /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
153     /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
154     ScalableLast
155   };
156 
157 private:
158   /// in [0..VF)
159   unsigned Lane;
160 
161   /// Indicates how the Lane should be interpreted, as described above.
162   Kind LaneKind;
163 
164 public:
VPLane(unsigned Lane,Kind LaneKind)165   VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
166 
getFirstLane()167   static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
168 
getLastLaneForVF(const ElementCount & VF)169   static VPLane getLastLaneForVF(const ElementCount &VF) {
170     unsigned LaneOffset = VF.getKnownMinValue() - 1;
171     Kind LaneKind;
172     if (VF.isScalable())
173       // In this case 'LaneOffset' refers to the offset from the start of the
174       // last subvector with VF.getKnownMinValue() elements.
175       LaneKind = VPLane::Kind::ScalableLast;
176     else
177       LaneKind = VPLane::Kind::First;
178     return VPLane(LaneOffset, LaneKind);
179   }
180 
181   /// Returns a compile-time known value for the lane index and asserts if the
182   /// lane can only be calculated at runtime.
getKnownLane()183   unsigned getKnownLane() const {
184     assert(LaneKind == Kind::First);
185     return Lane;
186   }
187 
188   /// Returns an expression describing the lane index that can be used at
189   /// runtime.
190   Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
191 
192   /// Returns the Kind of lane offset.
getKind()193   Kind getKind() const { return LaneKind; }
194 
195   /// Returns true if this is the first lane of the whole vector.
isFirstLane()196   bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
197 
198   /// Maps the lane to a cache index based on \p VF.
mapToCacheIndex(const ElementCount & VF)199   unsigned mapToCacheIndex(const ElementCount &VF) const {
200     switch (LaneKind) {
201     case VPLane::Kind::ScalableLast:
202       assert(VF.isScalable() && Lane < VF.getKnownMinValue());
203       return VF.getKnownMinValue() + Lane;
204     default:
205       assert(Lane < VF.getKnownMinValue());
206       return Lane;
207     }
208   }
209 
210   /// Returns the maxmimum number of lanes that we are able to consider
211   /// caching for \p VF.
getNumCachedLanes(const ElementCount & VF)212   static unsigned getNumCachedLanes(const ElementCount &VF) {
213     return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
214   }
215 };
216 
217 /// VPIteration represents a single point in the iteration space of the output
218 /// (vectorized and/or unrolled) IR loop.
219 struct VPIteration {
220   /// in [0..UF)
221   unsigned Part;
222 
223   VPLane Lane;
224 
225   VPIteration(unsigned Part, unsigned Lane,
226               VPLane::Kind Kind = VPLane::Kind::First)
PartVPIteration227       : Part(Part), Lane(Lane, Kind) {}
228 
VPIterationVPIteration229   VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
230 
isFirstIterationVPIteration231   bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
232 };
233 
234 /// VPTransformState holds information passed down when "executing" a VPlan,
235 /// needed for generating the output IR.
236 struct VPTransformState {
VPTransformStateVPTransformState237   VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
238                    DominatorTree *DT, IRBuilderBase &Builder,
239                    InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx)
240       : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
241         LVer(nullptr), TypeAnalysis(Ctx) {}
242 
243   /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
244   ElementCount VF;
245   unsigned UF;
246 
247   /// Hold the indices to generate specific scalar instructions. Null indicates
248   /// that all instances are to be generated, using either scalar or vector
249   /// instructions.
250   std::optional<VPIteration> Instance;
251 
252   struct DataState {
253     /// A type for vectorized values in the new loop. Each value from the
254     /// original loop, when vectorized, is represented by UF vector values in
255     /// the new unrolled loop, where UF is the unroll factor.
256     typedef SmallVector<Value *, 2> PerPartValuesTy;
257 
258     DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
259 
260     using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
261     DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
262   } Data;
263 
264   /// Get the generated Value for a given VPValue and a given Part. Note that
265   /// as some Defs are still created by ILV and managed in its ValueMap, this
266   /// method will delegate the call to ILV in such cases in order to provide
267   /// callers a consistent API.
268   /// \see set.
269   Value *get(VPValue *Def, unsigned Part);
270 
271   /// Get the generated Value for a given VPValue and given Part and Lane.
272   Value *get(VPValue *Def, const VPIteration &Instance);
273 
hasVectorValueVPTransformState274   bool hasVectorValue(VPValue *Def, unsigned Part) {
275     auto I = Data.PerPartOutput.find(Def);
276     return I != Data.PerPartOutput.end() && Part < I->second.size() &&
277            I->second[Part];
278   }
279 
hasScalarValueVPTransformState280   bool hasScalarValue(VPValue *Def, VPIteration Instance) {
281     auto I = Data.PerPartScalars.find(Def);
282     if (I == Data.PerPartScalars.end())
283       return false;
284     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
285     return Instance.Part < I->second.size() &&
286            CacheIdx < I->second[Instance.Part].size() &&
287            I->second[Instance.Part][CacheIdx];
288   }
289 
290   /// Set the generated Value for a given VPValue and a given Part.
setVPTransformState291   void set(VPValue *Def, Value *V, unsigned Part) {
292     if (!Data.PerPartOutput.count(Def)) {
293       DataState::PerPartValuesTy Entry(UF);
294       Data.PerPartOutput[Def] = Entry;
295     }
296     Data.PerPartOutput[Def][Part] = V;
297   }
298   /// Reset an existing vector value for \p Def and a given \p Part.
resetVPTransformState299   void reset(VPValue *Def, Value *V, unsigned Part) {
300     auto Iter = Data.PerPartOutput.find(Def);
301     assert(Iter != Data.PerPartOutput.end() &&
302            "need to overwrite existing value");
303     Iter->second[Part] = V;
304   }
305 
306   /// Set the generated scalar \p V for \p Def and the given \p Instance.
setVPTransformState307   void set(VPValue *Def, Value *V, const VPIteration &Instance) {
308     auto Iter = Data.PerPartScalars.insert({Def, {}});
309     auto &PerPartVec = Iter.first->second;
310     while (PerPartVec.size() <= Instance.Part)
311       PerPartVec.emplace_back();
312     auto &Scalars = PerPartVec[Instance.Part];
313     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
314     while (Scalars.size() <= CacheIdx)
315       Scalars.push_back(nullptr);
316     assert(!Scalars[CacheIdx] && "should overwrite existing value");
317     Scalars[CacheIdx] = V;
318   }
319 
320   /// Reset an existing scalar value for \p Def and a given \p Instance.
resetVPTransformState321   void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
322     auto Iter = Data.PerPartScalars.find(Def);
323     assert(Iter != Data.PerPartScalars.end() &&
324            "need to overwrite existing value");
325     assert(Instance.Part < Iter->second.size() &&
326            "need to overwrite existing value");
327     unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
328     assert(CacheIdx < Iter->second[Instance.Part].size() &&
329            "need to overwrite existing value");
330     Iter->second[Instance.Part][CacheIdx] = V;
331   }
332 
333   /// Add additional metadata to \p To that was not present on \p Orig.
334   ///
335   /// Currently this is used to add the noalias annotations based on the
336   /// inserted memchecks.  Use this for instructions that are *cloned* into the
337   /// vector loop.
338   void addNewMetadata(Instruction *To, const Instruction *Orig);
339 
340   /// Add metadata from one instruction to another.
341   ///
342   /// This includes both the original MDs from \p From and additional ones (\see
343   /// addNewMetadata).  Use this for *newly created* instructions in the vector
344   /// loop.
345   void addMetadata(Instruction *To, Instruction *From);
346 
347   /// Similar to the previous function but it adds the metadata to a
348   /// vector of instructions.
349   void addMetadata(ArrayRef<Value *> To, Instruction *From);
350 
351   /// Set the debug location in the builder using the debug location \p DL.
352   void setDebugLocFrom(DebugLoc DL);
353 
354   /// Construct the vector value of a scalarized value \p V one lane at a time.
355   void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);
356 
357   /// Hold state information used when constructing the CFG of the output IR,
358   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
359   struct CFGState {
360     /// The previous VPBasicBlock visited. Initially set to null.
361     VPBasicBlock *PrevVPBB = nullptr;
362 
363     /// The previous IR BasicBlock created or used. Initially set to the new
364     /// header BasicBlock.
365     BasicBlock *PrevBB = nullptr;
366 
367     /// The last IR BasicBlock in the output IR. Set to the exit block of the
368     /// vector loop.
369     BasicBlock *ExitBB = nullptr;
370 
371     /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
372     /// of replication, maps the BasicBlock of the last replica created.
373     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
374 
375     CFGState() = default;
376 
377     /// Returns the BasicBlock* mapped to the pre-header of the loop region
378     /// containing \p R.
379     BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
380   } CFG;
381 
382   /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
383   LoopInfo *LI;
384 
385   /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
386   DominatorTree *DT;
387 
388   /// Hold a reference to the IRBuilder used to generate output IR code.
389   IRBuilderBase &Builder;
390 
391   VPValue2ValueTy VPValue2Value;
392 
393   /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
394   Value *CanonicalIV = nullptr;
395 
396   /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
397   InnerLoopVectorizer *ILV;
398 
399   /// Pointer to the VPlan code is generated for.
400   VPlan *Plan;
401 
402   /// The loop object for the current parent region, or nullptr.
403   Loop *CurrentVectorLoop = nullptr;
404 
405   /// LoopVersioning.  It's only set up (non-null) if memchecks were
406   /// used.
407   ///
408   /// This is currently only used to add no-alias metadata based on the
409   /// memchecks.  The actually versioning is performed manually.
410   LoopVersioning *LVer = nullptr;
411 
412   /// Map SCEVs to their expanded values. Populated when executing
413   /// VPExpandSCEVRecipes.
414   DenseMap<const SCEV *, Value *> ExpandedSCEVs;
415 
416   /// VPlan-based type analysis.
417   VPTypeAnalysis TypeAnalysis;
418 };
419 
420 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
421 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
422 class VPBlockBase {
423   friend class VPBlockUtils;
424 
425   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
426 
427   /// An optional name for the block.
428   std::string Name;
429 
430   /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
431   /// it is a topmost VPBlockBase.
432   VPRegionBlock *Parent = nullptr;
433 
434   /// List of predecessor blocks.
435   SmallVector<VPBlockBase *, 1> Predecessors;
436 
437   /// List of successor blocks.
438   SmallVector<VPBlockBase *, 1> Successors;
439 
440   /// VPlan containing the block. Can only be set on the entry block of the
441   /// plan.
442   VPlan *Plan = nullptr;
443 
444   /// Add \p Successor as the last successor to this block.
appendSuccessor(VPBlockBase * Successor)445   void appendSuccessor(VPBlockBase *Successor) {
446     assert(Successor && "Cannot add nullptr successor!");
447     Successors.push_back(Successor);
448   }
449 
450   /// Add \p Predecessor as the last predecessor to this block.
appendPredecessor(VPBlockBase * Predecessor)451   void appendPredecessor(VPBlockBase *Predecessor) {
452     assert(Predecessor && "Cannot add nullptr predecessor!");
453     Predecessors.push_back(Predecessor);
454   }
455 
456   /// Remove \p Predecessor from the predecessors of this block.
removePredecessor(VPBlockBase * Predecessor)457   void removePredecessor(VPBlockBase *Predecessor) {
458     auto Pos = find(Predecessors, Predecessor);
459     assert(Pos && "Predecessor does not exist");
460     Predecessors.erase(Pos);
461   }
462 
463   /// Remove \p Successor from the successors of this block.
removeSuccessor(VPBlockBase * Successor)464   void removeSuccessor(VPBlockBase *Successor) {
465     auto Pos = find(Successors, Successor);
466     assert(Pos && "Successor does not exist");
467     Successors.erase(Pos);
468   }
469 
470 protected:
VPBlockBase(const unsigned char SC,const std::string & N)471   VPBlockBase(const unsigned char SC, const std::string &N)
472       : SubclassID(SC), Name(N) {}
473 
474 public:
475   /// An enumeration for keeping track of the concrete subclass of VPBlockBase
476   /// that are actually instantiated. Values of this enumeration are kept in the
477   /// SubclassID field of the VPBlockBase objects. They are used for concrete
478   /// type identification.
479   using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
480 
481   using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
482 
483   virtual ~VPBlockBase() = default;
484 
getName()485   const std::string &getName() const { return Name; }
486 
setName(const Twine & newName)487   void setName(const Twine &newName) { Name = newName.str(); }
488 
489   /// \return an ID for the concrete type of this object.
490   /// This is used to implement the classof checks. This should not be used
491   /// for any other purpose, as the values may change as LLVM evolves.
getVPBlockID()492   unsigned getVPBlockID() const { return SubclassID; }
493 
getParent()494   VPRegionBlock *getParent() { return Parent; }
getParent()495   const VPRegionBlock *getParent() const { return Parent; }
496 
497   /// \return A pointer to the plan containing the current block.
498   VPlan *getPlan();
499   const VPlan *getPlan() const;
500 
501   /// Sets the pointer of the plan containing the block. The block must be the
502   /// entry block into the VPlan.
503   void setPlan(VPlan *ParentPlan);
504 
setParent(VPRegionBlock * P)505   void setParent(VPRegionBlock *P) { Parent = P; }
506 
507   /// \return the VPBasicBlock that is the entry of this VPBlockBase,
508   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
509   /// VPBlockBase is a VPBasicBlock, it is returned.
510   const VPBasicBlock *getEntryBasicBlock() const;
511   VPBasicBlock *getEntryBasicBlock();
512 
513   /// \return the VPBasicBlock that is the exiting this VPBlockBase,
514   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
515   /// VPBlockBase is a VPBasicBlock, it is returned.
516   const VPBasicBlock *getExitingBasicBlock() const;
517   VPBasicBlock *getExitingBasicBlock();
518 
getSuccessors()519   const VPBlocksTy &getSuccessors() const { return Successors; }
getSuccessors()520   VPBlocksTy &getSuccessors() { return Successors; }
521 
successors()522   iterator_range<VPBlockBase **> successors() { return Successors; }
523 
getPredecessors()524   const VPBlocksTy &getPredecessors() const { return Predecessors; }
getPredecessors()525   VPBlocksTy &getPredecessors() { return Predecessors; }
526 
527   /// \return the successor of this VPBlockBase if it has a single successor.
528   /// Otherwise return a null pointer.
getSingleSuccessor()529   VPBlockBase *getSingleSuccessor() const {
530     return (Successors.size() == 1 ? *Successors.begin() : nullptr);
531   }
532 
533   /// \return the predecessor of this VPBlockBase if it has a single
534   /// predecessor. Otherwise return a null pointer.
getSinglePredecessor()535   VPBlockBase *getSinglePredecessor() const {
536     return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
537   }
538 
getNumSuccessors()539   size_t getNumSuccessors() const { return Successors.size(); }
getNumPredecessors()540   size_t getNumPredecessors() const { return Predecessors.size(); }
541 
542   /// An Enclosing Block of a block B is any block containing B, including B
543   /// itself. \return the closest enclosing block starting from "this", which
544   /// has successors. \return the root enclosing block if all enclosing blocks
545   /// have no successors.
546   VPBlockBase *getEnclosingBlockWithSuccessors();
547 
548   /// \return the closest enclosing block starting from "this", which has
549   /// predecessors. \return the root enclosing block if all enclosing blocks
550   /// have no predecessors.
551   VPBlockBase *getEnclosingBlockWithPredecessors();
552 
553   /// \return the successors either attached directly to this VPBlockBase or, if
554   /// this VPBlockBase is the exit block of a VPRegionBlock and has no
555   /// successors of its own, search recursively for the first enclosing
556   /// VPRegionBlock that has successors and return them. If no such
557   /// VPRegionBlock exists, return the (empty) successors of the topmost
558   /// VPBlockBase reached.
getHierarchicalSuccessors()559   const VPBlocksTy &getHierarchicalSuccessors() {
560     return getEnclosingBlockWithSuccessors()->getSuccessors();
561   }
562 
563   /// \return the hierarchical successor of this VPBlockBase if it has a single
564   /// hierarchical successor. Otherwise return a null pointer.
getSingleHierarchicalSuccessor()565   VPBlockBase *getSingleHierarchicalSuccessor() {
566     return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
567   }
568 
569   /// \return the predecessors either attached directly to this VPBlockBase or,
570   /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
571   /// predecessors of its own, search recursively for the first enclosing
572   /// VPRegionBlock that has predecessors and return them. If no such
573   /// VPRegionBlock exists, return the (empty) predecessors of the topmost
574   /// VPBlockBase reached.
getHierarchicalPredecessors()575   const VPBlocksTy &getHierarchicalPredecessors() {
576     return getEnclosingBlockWithPredecessors()->getPredecessors();
577   }
578 
579   /// \return the hierarchical predecessor of this VPBlockBase if it has a
580   /// single hierarchical predecessor. Otherwise return a null pointer.
getSingleHierarchicalPredecessor()581   VPBlockBase *getSingleHierarchicalPredecessor() {
582     return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
583   }
584 
585   /// Set a given VPBlockBase \p Successor as the single successor of this
586   /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
587   /// This VPBlockBase must have no successors.
setOneSuccessor(VPBlockBase * Successor)588   void setOneSuccessor(VPBlockBase *Successor) {
589     assert(Successors.empty() && "Setting one successor when others exist.");
590     assert(Successor->getParent() == getParent() &&
591            "connected blocks must have the same parent");
592     appendSuccessor(Successor);
593   }
594 
595   /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
596   /// successors of this VPBlockBase. This VPBlockBase is not added as
597   /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
598   /// successors.
setTwoSuccessors(VPBlockBase * IfTrue,VPBlockBase * IfFalse)599   void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
600     assert(Successors.empty() && "Setting two successors when others exist.");
601     appendSuccessor(IfTrue);
602     appendSuccessor(IfFalse);
603   }
604 
605   /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
606   /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
607   /// as successor of any VPBasicBlock in \p NewPreds.
setPredecessors(ArrayRef<VPBlockBase * > NewPreds)608   void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
609     assert(Predecessors.empty() && "Block predecessors already set.");
610     for (auto *Pred : NewPreds)
611       appendPredecessor(Pred);
612   }
613 
614   /// Remove all the predecessor of this block.
clearPredecessors()615   void clearPredecessors() { Predecessors.clear(); }
616 
617   /// Remove all the successors of this block.
clearSuccessors()618   void clearSuccessors() { Successors.clear(); }
619 
620   /// The method which generates the output IR that correspond to this
621   /// VPBlockBase, thereby "executing" the VPlan.
622   virtual void execute(VPTransformState *State) = 0;
623 
624   /// Delete all blocks reachable from a given VPBlockBase, inclusive.
625   static void deleteCFG(VPBlockBase *Entry);
626 
627   /// Return true if it is legal to hoist instructions into this block.
isLegalToHoistInto()628   bool isLegalToHoistInto() {
629     // There are currently no constraints that prevent an instruction to be
630     // hoisted into a VPBlockBase.
631     return true;
632   }
633 
634   /// Replace all operands of VPUsers in the block with \p NewValue and also
635   /// replaces all uses of VPValues defined in the block with NewValue.
636   virtual void dropAllReferences(VPValue *NewValue) = 0;
637 
638 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
printAsOperand(raw_ostream & OS,bool PrintType)639   void printAsOperand(raw_ostream &OS, bool PrintType) const {
640     OS << getName();
641   }
642 
643   /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
644   /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
645   /// consequtive numbers.
646   ///
647   /// Note that the numbering is applied to the whole VPlan, so printing
648   /// individual blocks is consistent with the whole VPlan printing.
649   virtual void print(raw_ostream &O, const Twine &Indent,
650                      VPSlotTracker &SlotTracker) const = 0;
651 
652   /// Print plain-text dump of this VPlan to \p O.
print(raw_ostream & O)653   void print(raw_ostream &O) const {
654     VPSlotTracker SlotTracker(getPlan());
655     print(O, "", SlotTracker);
656   }
657 
658   /// Print the successors of this block to \p O, prefixing all lines with \p
659   /// Indent.
660   void printSuccessors(raw_ostream &O, const Twine &Indent) const;
661 
662   /// Dump this VPBlockBase to dbgs().
dump()663   LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
664 #endif
665 };
666 
667 /// A value that is used outside the VPlan. The operand of the user needs to be
668 /// added to the associated LCSSA phi node.
669 class VPLiveOut : public VPUser {
670   PHINode *Phi;
671 
672 public:
VPLiveOut(PHINode * Phi,VPValue * Op)673   VPLiveOut(PHINode *Phi, VPValue *Op)
674       : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
675 
classof(const VPUser * U)676   static inline bool classof(const VPUser *U) {
677     return U->getVPUserID() == VPUser::VPUserID::LiveOut;
678   }
679 
680   /// Fixup the wrapped LCSSA phi node in the unique exit block.  This simply
681   /// means we need to add the appropriate incoming value from the middle
682   /// block as exiting edges from the scalar epilogue loop (if present) are
683   /// already in place, and we exit the vector loop exclusively to the middle
684   /// block.
685   void fixPhi(VPlan &Plan, VPTransformState &State);
686 
687   /// Returns true if the VPLiveOut uses scalars of operand \p Op.
usesScalars(const VPValue * Op)688   bool usesScalars(const VPValue *Op) const override {
689     assert(is_contained(operands(), Op) &&
690            "Op must be an operand of the recipe");
691     return true;
692   }
693 
getPhi()694   PHINode *getPhi() const { return Phi; }
695 
696 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
697   /// Print the VPLiveOut to \p O.
698   void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
699 #endif
700 };
701 
702 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
703 /// instructions. VPRecipeBase owns the VPValues it defines through VPDef
704 /// and is responsible for deleting its defined values. Single-value
705 /// recipes must inherit from VPSingleDef instead of inheriting from both
706 /// VPRecipeBase and VPValue separately.
707 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
708                      public VPDef,
709                      public VPUser {
710   friend VPBasicBlock;
711   friend class VPBlockUtils;
712 
713   /// Each VPRecipe belongs to a single VPBasicBlock.
714   VPBasicBlock *Parent = nullptr;
715 
716   /// The debug location for the recipe.
717   DebugLoc DL;
718 
719 public:
720   VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
721                DebugLoc DL = {})
VPDef(SC)722       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
723 
724   template <typename IterT>
725   VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
726                DebugLoc DL = {})
VPDef(SC)727       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
728   virtual ~VPRecipeBase() = default;
729 
730   /// \return the VPBasicBlock which this VPRecipe belongs to.
getParent()731   VPBasicBlock *getParent() { return Parent; }
getParent()732   const VPBasicBlock *getParent() const { return Parent; }
733 
734   /// The method which generates the output IR instructions that correspond to
735   /// this VPRecipe, thereby "executing" the VPlan.
736   virtual void execute(VPTransformState &State) = 0;
737 
738   /// Insert an unlinked recipe into a basic block immediately before
739   /// the specified recipe.
740   void insertBefore(VPRecipeBase *InsertPos);
741   /// Insert an unlinked recipe into \p BB immediately before the insertion
742   /// point \p IP;
743   void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
744 
745   /// Insert an unlinked Recipe into a basic block immediately after
746   /// the specified Recipe.
747   void insertAfter(VPRecipeBase *InsertPos);
748 
749   /// Unlink this recipe from its current VPBasicBlock and insert it into
750   /// the VPBasicBlock that MovePos lives in, right after MovePos.
751   void moveAfter(VPRecipeBase *MovePos);
752 
753   /// Unlink this recipe and insert into BB before I.
754   ///
755   /// \pre I is a valid iterator into BB.
756   void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
757 
758   /// This method unlinks 'this' from the containing basic block, but does not
759   /// delete it.
760   void removeFromParent();
761 
762   /// This method unlinks 'this' from the containing basic block and deletes it.
763   ///
764   /// \returns an iterator pointing to the element after the erased one
765   iplist<VPRecipeBase>::iterator eraseFromParent();
766 
767   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPDef * D)768   static inline bool classof(const VPDef *D) {
769     // All VPDefs are also VPRecipeBases.
770     return true;
771   }
772 
classof(const VPUser * U)773   static inline bool classof(const VPUser *U) {
774     return U->getVPUserID() == VPUser::VPUserID::Recipe;
775   }
776 
777   /// Returns true if the recipe may have side-effects.
778   bool mayHaveSideEffects() const;
779 
780   /// Returns true for PHI-like recipes.
isPhi()781   bool isPhi() const {
782     return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
783   }
784 
785   /// Returns true if the recipe may read from memory.
786   bool mayReadFromMemory() const;
787 
788   /// Returns true if the recipe may write to memory.
789   bool mayWriteToMemory() const;
790 
791   /// Returns true if the recipe may read from or write to memory.
mayReadOrWriteMemory()792   bool mayReadOrWriteMemory() const {
793     return mayReadFromMemory() || mayWriteToMemory();
794   }
795 
796   /// Returns the debug location of the recipe.
getDebugLoc()797   DebugLoc getDebugLoc() const { return DL; }
798 };
799 
800 // Helper macro to define common classof implementations for recipes.
801 #define VP_CLASSOF_IMPL(VPDefID)                                               \
802   static inline bool classof(const VPDef *D) {                                 \
803     return D->getVPDefID() == VPDefID;                                         \
804   }                                                                            \
805   static inline bool classof(const VPValue *V) {                               \
806     auto *R = V->getDefiningRecipe();                                          \
807     return R && R->getVPDefID() == VPDefID;                                    \
808   }                                                                            \
809   static inline bool classof(const VPUser *U) {                                \
810     auto *R = dyn_cast<VPRecipeBase>(U);                                       \
811     return R && R->getVPDefID() == VPDefID;                                    \
812   }                                                                            \
813   static inline bool classof(const VPRecipeBase *R) {                          \
814     return R->getVPDefID() == VPDefID;                                         \
815   }                                                                            \
816   static inline bool classof(const VPSingleDefRecipe *R) {                     \
817     return R->getVPDefID() == VPDefID;                                         \
818   }
819 
820 /// VPSingleDef is a base class for recipes for modeling a sequence of one or
821 /// more output IR that define a single result VPValue.
822 /// Note that VPRecipeBase must be inherited from before VPValue.
823 class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
824 public:
825   template <typename IterT>
826   VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
VPRecipeBase(SC,Operands,DL)827       : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
828 
829   VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
830                     DebugLoc DL = {})
VPRecipeBase(SC,Operands,DL)831       : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
832 
833   template <typename IterT>
834   VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
835                     DebugLoc DL = {})
VPRecipeBase(SC,Operands,DL)836       : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
837 
classof(const VPRecipeBase * R)838   static inline bool classof(const VPRecipeBase *R) {
839     switch (R->getVPDefID()) {
840     case VPRecipeBase::VPDerivedIVSC:
841     case VPRecipeBase::VPExpandSCEVSC:
842     case VPRecipeBase::VPInstructionSC:
843     case VPRecipeBase::VPReductionSC:
844     case VPRecipeBase::VPReplicateSC:
845     case VPRecipeBase::VPScalarIVStepsSC:
846     case VPRecipeBase::VPVectorPointerSC:
847     case VPRecipeBase::VPWidenCallSC:
848     case VPRecipeBase::VPWidenCanonicalIVSC:
849     case VPRecipeBase::VPWidenCastSC:
850     case VPRecipeBase::VPWidenGEPSC:
851     case VPRecipeBase::VPWidenSC:
852     case VPRecipeBase::VPWidenSelectSC:
853     case VPRecipeBase::VPBlendSC:
854     case VPRecipeBase::VPPredInstPHISC:
855     case VPRecipeBase::VPCanonicalIVPHISC:
856     case VPRecipeBase::VPActiveLaneMaskPHISC:
857     case VPRecipeBase::VPFirstOrderRecurrencePHISC:
858     case VPRecipeBase::VPWidenPHISC:
859     case VPRecipeBase::VPWidenIntOrFpInductionSC:
860     case VPRecipeBase::VPWidenPointerInductionSC:
861     case VPRecipeBase::VPReductionPHISC:
862       return true;
863     case VPRecipeBase::VPInterleaveSC:
864     case VPRecipeBase::VPBranchOnMaskSC:
865     case VPRecipeBase::VPWidenMemoryInstructionSC:
866       // TODO: Widened stores don't define a value, but widened loads do. Split
867       // the recipes to be able to make widened loads VPSingleDefRecipes.
868       return false;
869     }
870     llvm_unreachable("Unhandled VPDefID");
871   }
872 
classof(const VPUser * U)873   static inline bool classof(const VPUser *U) {
874     auto *R = dyn_cast<VPRecipeBase>(U);
875     return R && classof(R);
876   }
877 
878   /// Returns the underlying instruction.
getUnderlyingInstr()879   Instruction *getUnderlyingInstr() {
880     return cast<Instruction>(getUnderlyingValue());
881   }
getUnderlyingInstr()882   const Instruction *getUnderlyingInstr() const {
883     return cast<Instruction>(getUnderlyingValue());
884   }
885 };
886 
887 /// Class to record LLVM IR flag for a recipe along with it.
888 class VPRecipeWithIRFlags : public VPSingleDefRecipe {
889   enum class OperationType : unsigned char {
890     Cmp,
891     OverflowingBinOp,
892     DisjointOp,
893     PossiblyExactOp,
894     GEPOp,
895     FPMathOp,
896     NonNegOp,
897     Other
898   };
899 
900 public:
901   struct WrapFlagsTy {
902     char HasNUW : 1;
903     char HasNSW : 1;
904 
WrapFlagsTyWrapFlagsTy905     WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
906   };
907 
908 protected:
909   struct GEPFlagsTy {
910     char IsInBounds : 1;
GEPFlagsTyGEPFlagsTy911     GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
912   };
913 
914 private:
915   struct DisjointFlagsTy {
916     char IsDisjoint : 1;
917   };
918   struct ExactFlagsTy {
919     char IsExact : 1;
920   };
921   struct NonNegFlagsTy {
922     char NonNeg : 1;
923   };
924   struct FastMathFlagsTy {
925     char AllowReassoc : 1;
926     char NoNaNs : 1;
927     char NoInfs : 1;
928     char NoSignedZeros : 1;
929     char AllowReciprocal : 1;
930     char AllowContract : 1;
931     char ApproxFunc : 1;
932 
933     FastMathFlagsTy(const FastMathFlags &FMF);
934   };
935 
936   OperationType OpType;
937 
938   union {
939     CmpInst::Predicate CmpPredicate;
940     WrapFlagsTy WrapFlags;
941     DisjointFlagsTy DisjointFlags;
942     ExactFlagsTy ExactFlags;
943     GEPFlagsTy GEPFlags;
944     NonNegFlagsTy NonNegFlags;
945     FastMathFlagsTy FMFs;
946     unsigned AllFlags;
947   };
948 
949 public:
950   template <typename IterT>
951   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
VPSingleDefRecipe(SC,Operands,DL)952       : VPSingleDefRecipe(SC, Operands, DL) {
953     OpType = OperationType::Other;
954     AllFlags = 0;
955   }
956 
957   template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC,IterT Operands,Instruction & I)958   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
959       : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {
960     if (auto *Op = dyn_cast<CmpInst>(&I)) {
961       OpType = OperationType::Cmp;
962       CmpPredicate = Op->getPredicate();
963     } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
964       OpType = OperationType::DisjointOp;
965       DisjointFlags.IsDisjoint = Op->isDisjoint();
966     } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
967       OpType = OperationType::OverflowingBinOp;
968       WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
969     } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
970       OpType = OperationType::PossiblyExactOp;
971       ExactFlags.IsExact = Op->isExact();
972     } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
973       OpType = OperationType::GEPOp;
974       GEPFlags.IsInBounds = GEP->isInBounds();
975     } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
976       OpType = OperationType::NonNegOp;
977       NonNegFlags.NonNeg = PNNI->hasNonNeg();
978     } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
979       OpType = OperationType::FPMathOp;
980       FMFs = Op->getFastMathFlags();
981     } else {
982       OpType = OperationType::Other;
983       AllFlags = 0;
984     }
985   }
986 
987   template <typename IterT>
988   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
989                       CmpInst::Predicate Pred, DebugLoc DL = {})
VPSingleDefRecipe(SC,Operands,DL)990       : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
991         CmpPredicate(Pred) {}
992 
993   template <typename IterT>
994   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
995                       WrapFlagsTy WrapFlags, DebugLoc DL = {})
VPSingleDefRecipe(SC,Operands,DL)996       : VPSingleDefRecipe(SC, Operands, DL),
997         OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
998 
999   template <typename IterT>
1000   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1001                       FastMathFlags FMFs, DebugLoc DL = {})
VPSingleDefRecipe(SC,Operands,DL)1002       : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1003         FMFs(FMFs) {}
1004 
1005 protected:
1006   template <typename IterT>
1007   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1008                       GEPFlagsTy GEPFlags, DebugLoc DL = {})
VPSingleDefRecipe(SC,Operands,DL)1009       : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1010         GEPFlags(GEPFlags) {}
1011 
1012 public:
classof(const VPRecipeBase * R)1013   static inline bool classof(const VPRecipeBase *R) {
1014     return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1015            R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1016            R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1017            R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1018            R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1019            R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1020   }
1021 
1022   /// Drop all poison-generating flags.
dropPoisonGeneratingFlags()1023   void dropPoisonGeneratingFlags() {
1024     // NOTE: This needs to be kept in-sync with
1025     // Instruction::dropPoisonGeneratingFlags.
1026     switch (OpType) {
1027     case OperationType::OverflowingBinOp:
1028       WrapFlags.HasNUW = false;
1029       WrapFlags.HasNSW = false;
1030       break;
1031     case OperationType::DisjointOp:
1032       DisjointFlags.IsDisjoint = false;
1033       break;
1034     case OperationType::PossiblyExactOp:
1035       ExactFlags.IsExact = false;
1036       break;
1037     case OperationType::GEPOp:
1038       GEPFlags.IsInBounds = false;
1039       break;
1040     case OperationType::FPMathOp:
1041       FMFs.NoNaNs = false;
1042       FMFs.NoInfs = false;
1043       break;
1044     case OperationType::NonNegOp:
1045       NonNegFlags.NonNeg = false;
1046       break;
1047     case OperationType::Cmp:
1048     case OperationType::Other:
1049       break;
1050     }
1051   }
1052 
1053   /// Set the IR flags for \p I.
setFlags(Instruction * I)1054   void setFlags(Instruction *I) const {
1055     switch (OpType) {
1056     case OperationType::OverflowingBinOp:
1057       I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1058       I->setHasNoSignedWrap(WrapFlags.HasNSW);
1059       break;
1060     case OperationType::DisjointOp:
1061       cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1062       break;
1063     case OperationType::PossiblyExactOp:
1064       I->setIsExact(ExactFlags.IsExact);
1065       break;
1066     case OperationType::GEPOp:
1067       cast<GetElementPtrInst>(I)->setIsInBounds(GEPFlags.IsInBounds);
1068       break;
1069     case OperationType::FPMathOp:
1070       I->setHasAllowReassoc(FMFs.AllowReassoc);
1071       I->setHasNoNaNs(FMFs.NoNaNs);
1072       I->setHasNoInfs(FMFs.NoInfs);
1073       I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1074       I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1075       I->setHasAllowContract(FMFs.AllowContract);
1076       I->setHasApproxFunc(FMFs.ApproxFunc);
1077       break;
1078     case OperationType::NonNegOp:
1079       I->setNonNeg(NonNegFlags.NonNeg);
1080       break;
1081     case OperationType::Cmp:
1082     case OperationType::Other:
1083       break;
1084     }
1085   }
1086 
getPredicate()1087   CmpInst::Predicate getPredicate() const {
1088     assert(OpType == OperationType::Cmp &&
1089            "recipe doesn't have a compare predicate");
1090     return CmpPredicate;
1091   }
1092 
isInBounds()1093   bool isInBounds() const {
1094     assert(OpType == OperationType::GEPOp &&
1095            "recipe doesn't have inbounds flag");
1096     return GEPFlags.IsInBounds;
1097   }
1098 
1099   /// Returns true if the recipe has fast-math flags.
hasFastMathFlags()1100   bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1101 
1102   FastMathFlags getFastMathFlags() const;
1103 
hasNoUnsignedWrap()1104   bool hasNoUnsignedWrap() const {
1105     assert(OpType == OperationType::OverflowingBinOp &&
1106            "recipe doesn't have a NUW flag");
1107     return WrapFlags.HasNUW;
1108   }
1109 
hasNoSignedWrap()1110   bool hasNoSignedWrap() const {
1111     assert(OpType == OperationType::OverflowingBinOp &&
1112            "recipe doesn't have a NSW flag");
1113     return WrapFlags.HasNSW;
1114   }
1115 
1116 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1117   void printFlags(raw_ostream &O) const;
1118 #endif
1119 };
1120 
1121 /// This is a concrete Recipe that models a single VPlan-level instruction.
1122 /// While as any Recipe it may generate a sequence of IR instructions when
1123 /// executed, these instructions would always form a single-def expression as
1124 /// the VPInstruction is also a single def-use vertex.
1125 class VPInstruction : public VPRecipeWithIRFlags {
1126   friend class VPlanSlp;
1127 
1128 public:
1129   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1130   enum {
1131     FirstOrderRecurrenceSplice =
1132         Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1133                                       // values of a first-order recurrence.
1134     Not,
1135     SLPLoad,
1136     SLPStore,
1137     ActiveLaneMask,
1138     CalculateTripCountMinusVF,
1139     // Increment the canonical IV separately for each unrolled part.
1140     CanonicalIVIncrementForPart,
1141     BranchOnCount,
1142     BranchOnCond,
1143     ComputeReductionResult,
1144   };
1145 
1146 private:
1147   typedef unsigned char OpcodeTy;
1148   OpcodeTy Opcode;
1149 
1150   /// An optional name that can be used for the generated IR instruction.
1151   const std::string Name;
1152 
1153   /// Utility method serving execute(): generates a single instance of the
1154   /// modeled instruction. \returns the generated value for \p Part.
1155   /// In some cases an existing value is returned rather than a generated
1156   /// one.
1157   Value *generateInstruction(VPTransformState &State, unsigned Part);
1158 
1159 #if !defined(NDEBUG)
1160   /// Return true if the VPInstruction is a floating point math operation, i.e.
1161   /// has fast-math flags.
1162   bool isFPMathOp() const;
1163 #endif
1164 
1165 protected:
setUnderlyingInstr(Instruction * I)1166   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
1167 
1168 public:
1169   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
1170                 const Twine &Name = "")
VPRecipeWithIRFlags(VPDef::VPInstructionSC,Operands,DL)1171       : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1172         Opcode(Opcode), Name(Name.str()) {}
1173 
1174   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1175                 DebugLoc DL = {}, const Twine &Name = "")
VPInstruction(Opcode,ArrayRef<VPValue * > (Operands),DL,Name)1176       : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
1177 
1178   VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1179                 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1180 
1181   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1182                 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
VPRecipeWithIRFlags(VPDef::VPInstructionSC,Operands,WrapFlags,DL)1183       : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1184         Opcode(Opcode), Name(Name.str()) {}
1185 
1186   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1187                 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1188 
VP_CLASSOF_IMPL(VPDef::VPInstructionSC)1189   VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1190 
1191   unsigned getOpcode() const { return Opcode; }
1192 
1193   /// Generate the instruction.
1194   /// TODO: We currently execute only per-part unless a specific instance is
1195   /// provided.
1196   void execute(VPTransformState &State) override;
1197 
1198 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1199   /// Print the VPInstruction to \p O.
1200   void print(raw_ostream &O, const Twine &Indent,
1201              VPSlotTracker &SlotTracker) const override;
1202 
1203   /// Print the VPInstruction to dbgs() (for debugging).
1204   LLVM_DUMP_METHOD void dump() const;
1205 #endif
1206 
1207   /// Return true if this instruction may modify memory.
mayWriteToMemory()1208   bool mayWriteToMemory() const {
1209     // TODO: we can use attributes of the called function to rule out memory
1210     //       modifications.
1211     return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1212            Opcode == Instruction::Invoke || Opcode == SLPStore;
1213   }
1214 
hasResult()1215   bool hasResult() const {
1216     // CallInst may or may not have a result, depending on the called function.
1217     // Conservatively return calls have results for now.
1218     switch (getOpcode()) {
1219     case Instruction::Ret:
1220     case Instruction::Br:
1221     case Instruction::Store:
1222     case Instruction::Switch:
1223     case Instruction::IndirectBr:
1224     case Instruction::Resume:
1225     case Instruction::CatchRet:
1226     case Instruction::Unreachable:
1227     case Instruction::Fence:
1228     case Instruction::AtomicRMW:
1229     case VPInstruction::BranchOnCond:
1230     case VPInstruction::BranchOnCount:
1231       return false;
1232     default:
1233       return true;
1234     }
1235   }
1236 
1237   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1238   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1239     assert(is_contained(operands(), Op) &&
1240            "Op must be an operand of the recipe");
1241     if (getOperand(0) != Op)
1242       return false;
1243     switch (getOpcode()) {
1244     default:
1245       return false;
1246     case VPInstruction::ActiveLaneMask:
1247     case VPInstruction::CalculateTripCountMinusVF:
1248     case VPInstruction::CanonicalIVIncrementForPart:
1249     case VPInstruction::BranchOnCount:
1250       return true;
1251     };
1252     llvm_unreachable("switch should return");
1253   }
1254 
1255   /// Returns true if the recipe only uses the first part of operand \p Op.
onlyFirstPartUsed(const VPValue * Op)1256   bool onlyFirstPartUsed(const VPValue *Op) const override {
1257     assert(is_contained(operands(), Op) &&
1258            "Op must be an operand of the recipe");
1259     if (getOperand(0) != Op)
1260       return false;
1261     switch (getOpcode()) {
1262     default:
1263       return false;
1264     case VPInstruction::BranchOnCount:
1265       return true;
1266     };
1267     llvm_unreachable("switch should return");
1268   }
1269 };
1270 
1271 /// VPWidenRecipe is a recipe for producing a copy of vector type its
1272 /// ingredient. This recipe covers most of the traditional vectorization cases
1273 /// where each ingredient transforms into a vectorized version of itself.
1274 class VPWidenRecipe : public VPRecipeWithIRFlags {
1275   unsigned Opcode;
1276 
1277 public:
1278   template <typename IterT>
VPWidenRecipe(Instruction & I,iterator_range<IterT> Operands)1279   VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1280       : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1281         Opcode(I.getOpcode()) {}
1282 
1283   ~VPWidenRecipe() override = default;
1284 
1285   VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1286 
1287   /// Produce widened copies of all Ingredients.
1288   void execute(VPTransformState &State) override;
1289 
getOpcode()1290   unsigned getOpcode() const { return Opcode; }
1291 
1292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1293   /// Print the recipe.
1294   void print(raw_ostream &O, const Twine &Indent,
1295              VPSlotTracker &SlotTracker) const override;
1296 #endif
1297 };
1298 
1299 /// VPWidenCastRecipe is a recipe to create vector cast instructions.
1300 class VPWidenCastRecipe : public VPRecipeWithIRFlags {
1301   /// Cast instruction opcode.
1302   Instruction::CastOps Opcode;
1303 
1304   /// Result type for the cast.
1305   Type *ResultTy;
1306 
1307 public:
VPWidenCastRecipe(Instruction::CastOps Opcode,VPValue * Op,Type * ResultTy,CastInst & UI)1308   VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1309                     CastInst &UI)
1310       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1311         ResultTy(ResultTy) {
1312     assert(UI.getOpcode() == Opcode &&
1313            "opcode of underlying cast doesn't match");
1314     assert(UI.getType() == ResultTy &&
1315            "result type of underlying cast doesn't match");
1316   }
1317 
VPWidenCastRecipe(Instruction::CastOps Opcode,VPValue * Op,Type * ResultTy)1318   VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1319       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1320         ResultTy(ResultTy) {}
1321 
1322   ~VPWidenCastRecipe() override = default;
1323 
1324   VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1325 
1326   /// Produce widened copies of the cast.
1327   void execute(VPTransformState &State) override;
1328 
1329 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1330   /// Print the recipe.
1331   void print(raw_ostream &O, const Twine &Indent,
1332              VPSlotTracker &SlotTracker) const override;
1333 #endif
1334 
getOpcode()1335   Instruction::CastOps getOpcode() const { return Opcode; }
1336 
1337   /// Returns the result type of the cast.
getResultType()1338   Type *getResultType() const { return ResultTy; }
1339 };
1340 
1341 /// A recipe for widening Call instructions.
1342 class VPWidenCallRecipe : public VPSingleDefRecipe {
1343   /// ID of the vector intrinsic to call when widening the call. If set the
1344   /// Intrinsic::not_intrinsic, a library call will be used instead.
1345   Intrinsic::ID VectorIntrinsicID;
1346   /// If this recipe represents a library call, Variant stores a pointer to
1347   /// the chosen function. There is a 1:1 mapping between a given VF and the
1348   /// chosen vectorized variant, so there will be a different vplan for each
1349   /// VF with a valid variant.
1350   Function *Variant;
1351 
1352 public:
1353   template <typename IterT>
1354   VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
1355                     Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1356                     Function *Variant = nullptr)
1357       : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, &I, DL),
1358         VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
1359 
1360   ~VPWidenCallRecipe() override = default;
1361 
1362   VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1363 
1364   /// Produce a widened version of the call instruction.
1365   void execute(VPTransformState &State) override;
1366 
1367 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1368   /// Print the recipe.
1369   void print(raw_ostream &O, const Twine &Indent,
1370              VPSlotTracker &SlotTracker) const override;
1371 #endif
1372 };
1373 
1374 /// A recipe for widening select instructions.
1375 struct VPWidenSelectRecipe : public VPSingleDefRecipe {
1376   template <typename IterT>
VPWidenSelectRecipeVPWidenSelectRecipe1377   VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
1378       : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1379                           I.getDebugLoc()) {}
1380 
1381   ~VPWidenSelectRecipe() override = default;
1382 
1383   VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1384 
1385   /// Produce a widened version of the select instruction.
1386   void execute(VPTransformState &State) override;
1387 
1388 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1389   /// Print the recipe.
1390   void print(raw_ostream &O, const Twine &Indent,
1391              VPSlotTracker &SlotTracker) const override;
1392 #endif
1393 
getCondVPWidenSelectRecipe1394   VPValue *getCond() const {
1395     return getOperand(0);
1396   }
1397 
isInvariantCondVPWidenSelectRecipe1398   bool isInvariantCond() const {
1399     return getCond()->isDefinedOutsideVectorRegions();
1400   }
1401 };
1402 
1403 /// A recipe for handling GEP instructions.
1404 class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
isPointerLoopInvariant()1405   bool isPointerLoopInvariant() const {
1406     return getOperand(0)->isDefinedOutsideVectorRegions();
1407   }
1408 
isIndexLoopInvariant(unsigned I)1409   bool isIndexLoopInvariant(unsigned I) const {
1410     return getOperand(I + 1)->isDefinedOutsideVectorRegions();
1411   }
1412 
areAllOperandsInvariant()1413   bool areAllOperandsInvariant() const {
1414     return all_of(operands(), [](VPValue *Op) {
1415       return Op->isDefinedOutsideVectorRegions();
1416     });
1417   }
1418 
1419 public:
1420   template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst * GEP,iterator_range<IterT> Operands)1421   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
1422       : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1423 
1424   ~VPWidenGEPRecipe() override = default;
1425 
1426   VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1427 
1428   /// Generate the gep nodes.
1429   void execute(VPTransformState &State) override;
1430 
1431 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1432   /// Print the recipe.
1433   void print(raw_ostream &O, const Twine &Indent,
1434              VPSlotTracker &SlotTracker) const override;
1435 #endif
1436 };
1437 
1438 /// A recipe to compute the pointers for widened memory accesses of IndexTy for
1439 /// all parts. If IsReverse is true, compute pointers for accessing the input in
1440 /// reverse order per part.
1441 class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
1442   Type *IndexedTy;
1443   bool IsReverse;
1444 
1445 public:
VPVectorPointerRecipe(VPValue * Ptr,Type * IndexedTy,bool IsReverse,bool IsInBounds,DebugLoc DL)1446   VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1447                         bool IsInBounds, DebugLoc DL)
1448       : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1449                             GEPFlagsTy(IsInBounds), DL),
1450         IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1451 
1452   VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1453 
1454   void execute(VPTransformState &State) override;
1455 
onlyFirstLaneUsed(const VPValue * Op)1456   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1457     assert(is_contained(operands(), Op) &&
1458            "Op must be an operand of the recipe");
1459     return true;
1460   }
1461 
1462 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1463   /// Print the recipe.
1464   void print(raw_ostream &O, const Twine &Indent,
1465              VPSlotTracker &SlotTracker) const override;
1466 #endif
1467 };
1468 
1469 /// A pure virtual base class for all recipes modeling header phis, including
1470 /// phis for first order recurrences, pointer inductions and reductions. The
1471 /// start value is the first operand of the recipe and the incoming value from
1472 /// the backedge is the second operand.
1473 ///
1474 /// Inductions are modeled using the following sub-classes:
1475 ///  * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1476 ///    starting at a specified value (zero for the main vector loop, the resume
1477 ///    value for the epilogue vector loop) and stepping by 1. The induction
1478 ///    controls exiting of the vector loop by comparing against the vector trip
1479 ///    count. Produces a single scalar PHI for the induction value per
1480 ///    iteration.
1481 ///  * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1482 ///    floating point inductions with arbitrary start and step values. Produces
1483 ///    a vector PHI per-part.
1484 ///  * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1485 ///    value of an IV with different start and step values. Produces a single
1486 ///    scalar value per iteration
1487 ///  * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1488 ///    canonical or derived induction.
1489 ///  * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1490 ///    pointer induction. Produces either a vector PHI per-part or scalar values
1491 ///    per-lane based on the canonical induction.
1492 class VPHeaderPHIRecipe : public VPSingleDefRecipe {
1493 protected:
1494   VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1495                     VPValue *Start = nullptr, DebugLoc DL = {})
VPSingleDefRecipe(VPDefID,ArrayRef<VPValue * > (),UnderlyingInstr,DL)1496       : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1497     if (Start)
1498       addOperand(Start);
1499   }
1500 
1501 public:
1502   ~VPHeaderPHIRecipe() override = default;
1503 
1504   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * B)1505   static inline bool classof(const VPRecipeBase *B) {
1506     return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1507            B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1508   }
classof(const VPValue * V)1509   static inline bool classof(const VPValue *V) {
1510     auto *B = V->getDefiningRecipe();
1511     return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1512            B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1513   }
1514 
1515   /// Generate the phi nodes.
1516   void execute(VPTransformState &State) override = 0;
1517 
1518 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1519   /// Print the recipe.
1520   void print(raw_ostream &O, const Twine &Indent,
1521              VPSlotTracker &SlotTracker) const override = 0;
1522 #endif
1523 
1524   /// Returns the start value of the phi, if one is set.
getStartValue()1525   VPValue *getStartValue() {
1526     return getNumOperands() == 0 ? nullptr : getOperand(0);
1527   }
getStartValue()1528   VPValue *getStartValue() const {
1529     return getNumOperands() == 0 ? nullptr : getOperand(0);
1530   }
1531 
1532   /// Update the start value of the recipe.
setStartValue(VPValue * V)1533   void setStartValue(VPValue *V) { setOperand(0, V); }
1534 
1535   /// Returns the incoming value from the loop backedge.
getBackedgeValue()1536   virtual VPValue *getBackedgeValue() {
1537     return getOperand(1);
1538   }
1539 
1540   /// Returns the backedge value as a recipe. The backedge value is guaranteed
1541   /// to be a recipe.
getBackedgeRecipe()1542   virtual VPRecipeBase &getBackedgeRecipe() {
1543     return *getBackedgeValue()->getDefiningRecipe();
1544   }
1545 };
1546 
1547 /// A recipe for handling phi nodes of integer and floating-point inductions,
1548 /// producing their vector values.
1549 class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
1550   PHINode *IV;
1551   TruncInst *Trunc;
1552   const InductionDescriptor &IndDesc;
1553 
1554 public:
VPWidenIntOrFpInductionRecipe(PHINode * IV,VPValue * Start,VPValue * Step,const InductionDescriptor & IndDesc)1555   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1556                                 const InductionDescriptor &IndDesc)
1557       : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1558         Trunc(nullptr), IndDesc(IndDesc) {
1559     addOperand(Step);
1560   }
1561 
VPWidenIntOrFpInductionRecipe(PHINode * IV,VPValue * Start,VPValue * Step,const InductionDescriptor & IndDesc,TruncInst * Trunc)1562   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1563                                 const InductionDescriptor &IndDesc,
1564                                 TruncInst *Trunc)
1565       : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1566         IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1567     addOperand(Step);
1568   }
1569 
1570   ~VPWidenIntOrFpInductionRecipe() override = default;
1571 
1572   VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1573 
1574   /// Generate the vectorized and scalarized versions of the phi node as
1575   /// needed by their users.
1576   void execute(VPTransformState &State) override;
1577 
1578 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1579   /// Print the recipe.
1580   void print(raw_ostream &O, const Twine &Indent,
1581              VPSlotTracker &SlotTracker) const override;
1582 #endif
1583 
getBackedgeValue()1584   VPValue *getBackedgeValue() override {
1585     // TODO: All operands of base recipe must exist and be at same index in
1586     // derived recipe.
1587     llvm_unreachable(
1588         "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1589   }
1590 
getBackedgeRecipe()1591   VPRecipeBase &getBackedgeRecipe() override {
1592     // TODO: All operands of base recipe must exist and be at same index in
1593     // derived recipe.
1594     llvm_unreachable(
1595         "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1596   }
1597 
1598   /// Returns the step value of the induction.
getStepValue()1599   VPValue *getStepValue() { return getOperand(1); }
getStepValue()1600   const VPValue *getStepValue() const { return getOperand(1); }
1601 
1602   /// Returns the first defined value as TruncInst, if it is one or nullptr
1603   /// otherwise.
getTruncInst()1604   TruncInst *getTruncInst() { return Trunc; }
getTruncInst()1605   const TruncInst *getTruncInst() const { return Trunc; }
1606 
getPHINode()1607   PHINode *getPHINode() { return IV; }
1608 
1609   /// Returns the induction descriptor for the recipe.
getInductionDescriptor()1610   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1611 
1612   /// Returns true if the induction is canonical, i.e. starting at 0 and
1613   /// incremented by UF * VF (= the original IV is incremented by 1).
1614   bool isCanonical() const;
1615 
1616   /// Returns the scalar type of the induction.
getScalarType()1617   Type *getScalarType() const {
1618     return Trunc ? Trunc->getType() : IV->getType();
1619   }
1620 };
1621 
1622 class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
1623   const InductionDescriptor &IndDesc;
1624 
1625   bool IsScalarAfterVectorization;
1626 
1627 public:
1628   /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1629   /// Start.
VPWidenPointerInductionRecipe(PHINode * Phi,VPValue * Start,VPValue * Step,const InductionDescriptor & IndDesc,bool IsScalarAfterVectorization)1630   VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
1631                                 const InductionDescriptor &IndDesc,
1632                                 bool IsScalarAfterVectorization)
1633       : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1634         IndDesc(IndDesc),
1635         IsScalarAfterVectorization(IsScalarAfterVectorization) {
1636     addOperand(Start);
1637     addOperand(Step);
1638   }
1639 
1640   ~VPWidenPointerInductionRecipe() override = default;
1641 
1642   VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1643 
1644   /// Generate vector values for the pointer induction.
1645   void execute(VPTransformState &State) override;
1646 
1647   /// Returns true if only scalar values will be generated.
1648   bool onlyScalarsGenerated(ElementCount VF);
1649 
1650   /// Returns the induction descriptor for the recipe.
getInductionDescriptor()1651   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1652 
1653 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1654   /// Print the recipe.
1655   void print(raw_ostream &O, const Twine &Indent,
1656              VPSlotTracker &SlotTracker) const override;
1657 #endif
1658 };
1659 
1660 /// A recipe for handling header phis that are widened in the vector loop.
1661 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1662 /// managed in the recipe directly.
1663 class VPWidenPHIRecipe : public VPHeaderPHIRecipe {
1664   /// List of incoming blocks. Only used in the VPlan native path.
1665   SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1666 
1667 public:
1668   /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1669   VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
VPHeaderPHIRecipe(VPDef::VPWidenPHISC,Phi)1670       : VPHeaderPHIRecipe(VPDef::VPWidenPHISC, Phi) {
1671     if (Start)
1672       addOperand(Start);
1673   }
1674 
1675   ~VPWidenPHIRecipe() override = default;
1676 
1677   VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1678 
1679   /// Generate the phi/select nodes.
1680   void execute(VPTransformState &State) override;
1681 
1682 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1683   /// Print the recipe.
1684   void print(raw_ostream &O, const Twine &Indent,
1685              VPSlotTracker &SlotTracker) const override;
1686 #endif
1687 
1688   /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
addIncoming(VPValue * IncomingV,VPBasicBlock * IncomingBlock)1689   void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1690     addOperand(IncomingV);
1691     IncomingBlocks.push_back(IncomingBlock);
1692   }
1693 
1694   /// Returns the \p I th incoming VPBasicBlock.
getIncomingBlock(unsigned I)1695   VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1696 
1697   /// Returns the \p I th incoming VPValue.
getIncomingValue(unsigned I)1698   VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1699 };
1700 
1701 /// A recipe for handling first-order recurrence phis. The start value is the
1702 /// first operand of the recipe and the incoming value from the backedge is the
1703 /// second operand.
1704 struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
VPFirstOrderRecurrencePHIRecipeVPFirstOrderRecurrencePHIRecipe1705   VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
1706       : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1707 
VP_CLASSOF_IMPLVPFirstOrderRecurrencePHIRecipe1708   VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1709 
1710   static inline bool classof(const VPHeaderPHIRecipe *R) {
1711     return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1712   }
1713 
1714   void execute(VPTransformState &State) override;
1715 
1716 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1717   /// Print the recipe.
1718   void print(raw_ostream &O, const Twine &Indent,
1719              VPSlotTracker &SlotTracker) const override;
1720 #endif
1721 };
1722 
1723 /// A recipe for handling reduction phis. The start value is the first operand
1724 /// of the recipe and the incoming value from the backedge is the second
1725 /// operand.
1726 class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
1727   /// Descriptor for the reduction.
1728   const RecurrenceDescriptor &RdxDesc;
1729 
1730   /// The phi is part of an in-loop reduction.
1731   bool IsInLoop;
1732 
1733   /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1734   bool IsOrdered;
1735 
1736 public:
1737   /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1738   /// RdxDesc.
1739   VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
1740                        VPValue &Start, bool IsInLoop = false,
1741                        bool IsOrdered = false)
1742       : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1743         RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1744     assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1745   }
1746 
1747   ~VPReductionPHIRecipe() override = default;
1748 
VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)1749   VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1750 
1751   static inline bool classof(const VPHeaderPHIRecipe *R) {
1752     return R->getVPDefID() == VPDef::VPReductionPHISC;
1753   }
1754 
1755   /// Generate the phi/select nodes.
1756   void execute(VPTransformState &State) override;
1757 
1758 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1759   /// Print the recipe.
1760   void print(raw_ostream &O, const Twine &Indent,
1761              VPSlotTracker &SlotTracker) const override;
1762 #endif
1763 
getRecurrenceDescriptor()1764   const RecurrenceDescriptor &getRecurrenceDescriptor() const {
1765     return RdxDesc;
1766   }
1767 
1768   /// Returns true, if the phi is part of an ordered reduction.
isOrdered()1769   bool isOrdered() const { return IsOrdered; }
1770 
1771   /// Returns true, if the phi is part of an in-loop reduction.
isInLoop()1772   bool isInLoop() const { return IsInLoop; }
1773 };
1774 
1775 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
1776 /// instructions.
1777 class VPBlendRecipe : public VPSingleDefRecipe {
1778 public:
1779   /// The blend operation is a User of the incoming values and of their
1780   /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1781   /// might be incoming with a full mask for which there is no VPValue.
VPBlendRecipe(PHINode * Phi,ArrayRef<VPValue * > Operands)1782   VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1783       : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
1784     assert(Operands.size() > 0 &&
1785            ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1786            "Expected either a single incoming value or a positive even number "
1787            "of operands");
1788   }
1789 
VP_CLASSOF_IMPL(VPDef::VPBlendSC)1790   VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1791 
1792   /// Return the number of incoming values, taking into account that a single
1793   /// incoming value has no mask.
1794   unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1795 
1796   /// Return incoming value number \p Idx.
getIncomingValue(unsigned Idx)1797   VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1798 
1799   /// Return mask number \p Idx.
getMask(unsigned Idx)1800   VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1801 
1802   /// Generate the phi/select nodes.
1803   void execute(VPTransformState &State) override;
1804 
1805 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1806   /// Print the recipe.
1807   void print(raw_ostream &O, const Twine &Indent,
1808              VPSlotTracker &SlotTracker) const override;
1809 #endif
1810 
1811   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1812   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1813     assert(is_contained(operands(), Op) &&
1814            "Op must be an operand of the recipe");
1815     // Recursing through Blend recipes only, must terminate at header phi's the
1816     // latest.
1817     return all_of(users(),
1818                   [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
1819   }
1820 };
1821 
1822 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1823 /// or stores into one wide load/store and shuffles. The first operand of a
1824 /// VPInterleave recipe is the address, followed by the stored values, followed
1825 /// by an optional mask.
1826 class VPInterleaveRecipe : public VPRecipeBase {
1827   const InterleaveGroup<Instruction> *IG;
1828 
1829   /// Indicates if the interleave group is in a conditional block and requires a
1830   /// mask.
1831   bool HasMask = false;
1832 
1833   /// Indicates if gaps between members of the group need to be masked out or if
1834   /// unusued gaps can be loaded speculatively.
1835   bool NeedsMaskForGaps = false;
1836 
1837 public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> * IG,VPValue * Addr,ArrayRef<VPValue * > StoredValues,VPValue * Mask,bool NeedsMaskForGaps)1838   VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
1839                      ArrayRef<VPValue *> StoredValues, VPValue *Mask,
1840                      bool NeedsMaskForGaps)
1841       : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
1842         NeedsMaskForGaps(NeedsMaskForGaps) {
1843     for (unsigned i = 0; i < IG->getFactor(); ++i)
1844       if (Instruction *I = IG->getMember(i)) {
1845         if (I->getType()->isVoidTy())
1846           continue;
1847         new VPValue(I, this);
1848       }
1849 
1850     for (auto *SV : StoredValues)
1851       addOperand(SV);
1852     if (Mask) {
1853       HasMask = true;
1854       addOperand(Mask);
1855     }
1856   }
1857   ~VPInterleaveRecipe() override = default;
1858 
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)1859   VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
1860 
1861   /// Return the address accessed by this recipe.
1862   VPValue *getAddr() const {
1863     return getOperand(0); // Address is the 1st, mandatory operand.
1864   }
1865 
1866   /// Return the mask used by this recipe. Note that a full mask is represented
1867   /// by a nullptr.
getMask()1868   VPValue *getMask() const {
1869     // Mask is optional and therefore the last, currently 2nd operand.
1870     return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1871   }
1872 
1873   /// Return the VPValues stored by this interleave group. If it is a load
1874   /// interleave group, return an empty ArrayRef.
getStoredValues()1875   ArrayRef<VPValue *> getStoredValues() const {
1876     // The first operand is the address, followed by the stored values, followed
1877     // by an optional mask.
1878     return ArrayRef<VPValue *>(op_begin(), getNumOperands())
1879         .slice(1, getNumStoreOperands());
1880   }
1881 
1882   /// Generate the wide load or store, and shuffles.
1883   void execute(VPTransformState &State) override;
1884 
1885 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1886   /// Print the recipe.
1887   void print(raw_ostream &O, const Twine &Indent,
1888              VPSlotTracker &SlotTracker) const override;
1889 #endif
1890 
getInterleaveGroup()1891   const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1892 
1893   /// Returns the number of stored operands of this interleave group. Returns 0
1894   /// for load interleave groups.
getNumStoreOperands()1895   unsigned getNumStoreOperands() const {
1896     return getNumOperands() - (HasMask ? 2 : 1);
1897   }
1898 
1899   /// The recipe only uses the first lane of the address.
onlyFirstLaneUsed(const VPValue * Op)1900   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1901     assert(is_contained(operands(), Op) &&
1902            "Op must be an operand of the recipe");
1903     return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
1904   }
1905 };
1906 
1907 /// A recipe to represent inloop reduction operations, performing a reduction on
1908 /// a vector operand into a scalar value, and adding the result to a chain.
1909 /// The Operands are {ChainOp, VecOp, [Condition]}.
1910 class VPReductionRecipe : public VPSingleDefRecipe {
1911   /// The recurrence decriptor for the reduction in question.
1912   const RecurrenceDescriptor &RdxDesc;
1913 
1914 public:
VPReductionRecipe(const RecurrenceDescriptor & R,Instruction * I,VPValue * ChainOp,VPValue * VecOp,VPValue * CondOp)1915   VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
1916                     VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp)
1917       : VPSingleDefRecipe(VPDef::VPReductionSC,
1918                           ArrayRef<VPValue *>({ChainOp, VecOp}), I),
1919         RdxDesc(R) {
1920     if (CondOp)
1921       addOperand(CondOp);
1922   }
1923 
1924   ~VPReductionRecipe() override = default;
1925 
1926   VP_CLASSOF_IMPL(VPDef::VPReductionSC)
1927 
1928   /// Generate the reduction in the loop
1929   void execute(VPTransformState &State) override;
1930 
1931 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1932   /// Print the recipe.
1933   void print(raw_ostream &O, const Twine &Indent,
1934              VPSlotTracker &SlotTracker) const override;
1935 #endif
1936 
1937   /// The VPValue of the scalar Chain being accumulated.
getChainOp()1938   VPValue *getChainOp() const { return getOperand(0); }
1939   /// The VPValue of the vector value to be reduced.
getVecOp()1940   VPValue *getVecOp() const { return getOperand(1); }
1941   /// The VPValue of the condition for the block.
getCondOp()1942   VPValue *getCondOp() const {
1943     return getNumOperands() > 2 ? getOperand(2) : nullptr;
1944   }
1945 };
1946 
1947 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1948 /// copies of the original scalar type, one per lane, instead of producing a
1949 /// single copy of widened type for all lanes. If the instruction is known to be
1950 /// uniform only one copy, per lane zero, will be generated.
1951 class VPReplicateRecipe : public VPRecipeWithIRFlags {
1952   /// Indicator if only a single replica per lane is needed.
1953   bool IsUniform;
1954 
1955   /// Indicator if the replicas are also predicated.
1956   bool IsPredicated;
1957 
1958 public:
1959   template <typename IterT>
1960   VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1961                     bool IsUniform, VPValue *Mask = nullptr)
1962       : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
1963         IsUniform(IsUniform), IsPredicated(Mask) {
1964     if (Mask)
1965       addOperand(Mask);
1966   }
1967 
1968   ~VPReplicateRecipe() override = default;
1969 
1970   VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
1971 
1972   /// Generate replicas of the desired Ingredient. Replicas will be generated
1973   /// for all parts and lanes unless a specific part and lane are specified in
1974   /// the \p State.
1975   void execute(VPTransformState &State) override;
1976 
1977 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1978   /// Print the recipe.
1979   void print(raw_ostream &O, const Twine &Indent,
1980              VPSlotTracker &SlotTracker) const override;
1981 #endif
1982 
isUniform()1983   bool isUniform() const { return IsUniform; }
1984 
isPredicated()1985   bool isPredicated() const { return IsPredicated; }
1986 
1987   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)1988   bool onlyFirstLaneUsed(const VPValue *Op) const override {
1989     assert(is_contained(operands(), Op) &&
1990            "Op must be an operand of the recipe");
1991     return isUniform();
1992   }
1993 
1994   /// Returns true if the recipe uses scalars of operand \p Op.
usesScalars(const VPValue * Op)1995   bool usesScalars(const VPValue *Op) const override {
1996     assert(is_contained(operands(), Op) &&
1997            "Op must be an operand of the recipe");
1998     return true;
1999   }
2000 
2001   /// Returns true if the recipe is used by a widened recipe via an intervening
2002   /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2003   /// in a vector.
2004   bool shouldPack() const;
2005 
2006   /// Return the mask of a predicated VPReplicateRecipe.
getMask()2007   VPValue *getMask() {
2008     assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2009     return getOperand(getNumOperands() - 1);
2010   }
2011 };
2012 
2013 /// A recipe for generating conditional branches on the bits of a mask.
2014 class VPBranchOnMaskRecipe : public VPRecipeBase {
2015 public:
VPBranchOnMaskRecipe(VPValue * BlockInMask)2016   VPBranchOnMaskRecipe(VPValue *BlockInMask)
2017       : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2018     if (BlockInMask) // nullptr means all-one mask.
2019       addOperand(BlockInMask);
2020   }
2021 
2022   VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2023 
2024   /// Generate the extraction of the appropriate bit from the block mask and the
2025   /// conditional branch.
2026   void execute(VPTransformState &State) override;
2027 
2028 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2029   /// Print the recipe.
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker)2030   void print(raw_ostream &O, const Twine &Indent,
2031              VPSlotTracker &SlotTracker) const override {
2032     O << Indent << "BRANCH-ON-MASK ";
2033     if (VPValue *Mask = getMask())
2034       Mask->printAsOperand(O, SlotTracker);
2035     else
2036       O << " All-One";
2037   }
2038 #endif
2039 
2040   /// Return the mask used by this recipe. Note that a full mask is represented
2041   /// by a nullptr.
getMask()2042   VPValue *getMask() const {
2043     assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2044     // Mask is optional.
2045     return getNumOperands() == 1 ? getOperand(0) : nullptr;
2046   }
2047 
2048   /// Returns true if the recipe uses scalars of operand \p Op.
usesScalars(const VPValue * Op)2049   bool usesScalars(const VPValue *Op) const override {
2050     assert(is_contained(operands(), Op) &&
2051            "Op must be an operand of the recipe");
2052     return true;
2053   }
2054 };
2055 
2056 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2057 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
2058 /// order to merge values that are set under such a branch and feed their uses.
2059 /// The phi nodes can be scalar or vector depending on the users of the value.
2060 /// This recipe works in concert with VPBranchOnMaskRecipe.
2061 class VPPredInstPHIRecipe : public VPSingleDefRecipe {
2062 public:
2063   /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2064   /// nodes after merging back from a Branch-on-Mask.
VPPredInstPHIRecipe(VPValue * PredV)2065   VPPredInstPHIRecipe(VPValue *PredV)
2066       : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2067   ~VPPredInstPHIRecipe() override = default;
2068 
2069   VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2070 
2071   /// Generates phi nodes for live-outs as needed to retain SSA form.
2072   void execute(VPTransformState &State) override;
2073 
2074 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2075   /// Print the recipe.
2076   void print(raw_ostream &O, const Twine &Indent,
2077              VPSlotTracker &SlotTracker) const override;
2078 #endif
2079 
2080   /// Returns true if the recipe uses scalars of operand \p Op.
usesScalars(const VPValue * Op)2081   bool usesScalars(const VPValue *Op) const override {
2082     assert(is_contained(operands(), Op) &&
2083            "Op must be an operand of the recipe");
2084     return true;
2085   }
2086 };
2087 
2088 /// A Recipe for widening load/store operations.
2089 /// The recipe uses the following VPValues:
2090 /// - For load: Address, optional mask
2091 /// - For store: Address, stored value, optional mask
2092 /// TODO: We currently execute only per-part unless a specific instance is
2093 /// provided.
2094 class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
2095   Instruction &Ingredient;
2096 
2097   // Whether the loaded-from / stored-to addresses are consecutive.
2098   bool Consecutive;
2099 
2100   // Whether the consecutive loaded/stored addresses are in reverse order.
2101   bool Reverse;
2102 
setMask(VPValue * Mask)2103   void setMask(VPValue *Mask) {
2104     if (!Mask)
2105       return;
2106     addOperand(Mask);
2107   }
2108 
isMasked()2109   bool isMasked() const {
2110     return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
2111   }
2112 
2113 public:
VPWidenMemoryInstructionRecipe(LoadInst & Load,VPValue * Addr,VPValue * Mask,bool Consecutive,bool Reverse)2114   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2115                                  bool Consecutive, bool Reverse)
2116       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
2117         Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
2118     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2119     new VPValue(this, &Load);
2120     setMask(Mask);
2121   }
2122 
VPWidenMemoryInstructionRecipe(StoreInst & Store,VPValue * Addr,VPValue * StoredValue,VPValue * Mask,bool Consecutive,bool Reverse)2123   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
2124                                  VPValue *StoredValue, VPValue *Mask,
2125                                  bool Consecutive, bool Reverse)
2126       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),
2127         Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
2128     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2129     setMask(Mask);
2130   }
2131 
VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)2132   VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
2133 
2134   /// Return the address accessed by this recipe.
2135   VPValue *getAddr() const {
2136     return getOperand(0); // Address is the 1st, mandatory operand.
2137   }
2138 
2139   /// Return the mask used by this recipe. Note that a full mask is represented
2140   /// by a nullptr.
getMask()2141   VPValue *getMask() const {
2142     // Mask is optional and therefore the last operand.
2143     return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2144   }
2145 
2146   /// Returns true if this recipe is a store.
isStore()2147   bool isStore() const { return isa<StoreInst>(Ingredient); }
2148 
2149   /// Return the address accessed by this recipe.
getStoredValue()2150   VPValue *getStoredValue() const {
2151     assert(isStore() && "Stored value only available for store instructions");
2152     return getOperand(1); // Stored value is the 2nd, mandatory operand.
2153   }
2154 
2155   // Return whether the loaded-from / stored-to addresses are consecutive.
isConsecutive()2156   bool isConsecutive() const { return Consecutive; }
2157 
2158   // Return whether the consecutive loaded/stored addresses are in reverse
2159   // order.
isReverse()2160   bool isReverse() const { return Reverse; }
2161 
2162   /// Generate the wide load/store.
2163   void execute(VPTransformState &State) override;
2164 
2165 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2166   /// Print the recipe.
2167   void print(raw_ostream &O, const Twine &Indent,
2168              VPSlotTracker &SlotTracker) const override;
2169 #endif
2170 
2171   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)2172   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2173     assert(is_contained(operands(), Op) &&
2174            "Op must be an operand of the recipe");
2175 
2176     // Widened, consecutive memory operations only demand the first lane of
2177     // their address, unless the same operand is also stored. That latter can
2178     // happen with opaque pointers.
2179     return Op == getAddr() && isConsecutive() &&
2180            (!isStore() || Op != getStoredValue());
2181   }
2182 
getIngredient()2183   Instruction &getIngredient() const { return Ingredient; }
2184 };
2185 
2186 /// Recipe to expand a SCEV expression.
2187 class VPExpandSCEVRecipe : public VPSingleDefRecipe {
2188   const SCEV *Expr;
2189   ScalarEvolution &SE;
2190 
2191 public:
VPExpandSCEVRecipe(const SCEV * Expr,ScalarEvolution & SE)2192   VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
2193       : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2194 
2195   ~VPExpandSCEVRecipe() override = default;
2196 
2197   VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2198 
2199   /// Generate a canonical vector induction variable of the vector loop, with
2200   void execute(VPTransformState &State) override;
2201 
2202 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2203   /// Print the recipe.
2204   void print(raw_ostream &O, const Twine &Indent,
2205              VPSlotTracker &SlotTracker) const override;
2206 #endif
2207 
getSCEV()2208   const SCEV *getSCEV() const { return Expr; }
2209 };
2210 
2211 /// Canonical scalar induction phi of the vector loop. Starting at the specified
2212 /// start value (either 0 or the resume value when vectorizing the epilogue
2213 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2214 /// canonical induction variable.
2215 class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
2216 public:
VPCanonicalIVPHIRecipe(VPValue * StartV,DebugLoc DL)2217   VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
2218       : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2219 
2220   ~VPCanonicalIVPHIRecipe() override = default;
2221 
VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)2222   VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2223 
2224   static inline bool classof(const VPHeaderPHIRecipe *D) {
2225     return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2226   }
2227 
2228   /// Generate the canonical scalar induction phi of the vector loop.
2229   void execute(VPTransformState &State) override;
2230 
2231 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2232   /// Print the recipe.
2233   void print(raw_ostream &O, const Twine &Indent,
2234              VPSlotTracker &SlotTracker) const override;
2235 #endif
2236 
2237   /// Returns the scalar type of the induction.
getScalarType()2238   Type *getScalarType() const {
2239     return getStartValue()->getLiveInIRValue()->getType();
2240   }
2241 
2242   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)2243   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2244     assert(is_contained(operands(), Op) &&
2245            "Op must be an operand of the recipe");
2246     return true;
2247   }
2248 
2249   /// Returns true if the recipe only uses the first part of operand \p Op.
onlyFirstPartUsed(const VPValue * Op)2250   bool onlyFirstPartUsed(const VPValue *Op) const override {
2251     assert(is_contained(operands(), Op) &&
2252            "Op must be an operand of the recipe");
2253     return true;
2254   }
2255 
2256   /// Check if the induction described by \p Kind, /p Start and \p Step is
2257   /// canonical, i.e.  has the same start, step (of 1), and type as the
2258   /// canonical IV.
2259   bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
2260                    VPValue *Step, Type *Ty) const;
2261 };
2262 
2263 /// A recipe for generating the active lane mask for the vector loop that is
2264 /// used to predicate the vector operations.
2265 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2266 /// remove VPActiveLaneMaskPHIRecipe.
2267 class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
2268 public:
VPActiveLaneMaskPHIRecipe(VPValue * StartMask,DebugLoc DL)2269   VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
2270       : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2271                           DL) {}
2272 
2273   ~VPActiveLaneMaskPHIRecipe() override = default;
2274 
VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)2275   VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2276 
2277   static inline bool classof(const VPHeaderPHIRecipe *D) {
2278     return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2279   }
2280 
2281   /// Generate the active lane mask phi of the vector loop.
2282   void execute(VPTransformState &State) override;
2283 
2284 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2285   /// Print the recipe.
2286   void print(raw_ostream &O, const Twine &Indent,
2287              VPSlotTracker &SlotTracker) const override;
2288 #endif
2289 };
2290 
2291 /// A Recipe for widening the canonical induction variable of the vector loop.
2292 class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
2293 public:
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe * CanonicalIV)2294   VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
2295       : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2296 
2297   ~VPWidenCanonicalIVRecipe() override = default;
2298 
2299   VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2300 
2301   /// Generate a canonical vector induction variable of the vector loop, with
2302   /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2303   /// step = <VF*UF, VF*UF, ..., VF*UF>.
2304   void execute(VPTransformState &State) override;
2305 
2306 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2307   /// Print the recipe.
2308   void print(raw_ostream &O, const Twine &Indent,
2309              VPSlotTracker &SlotTracker) const override;
2310 #endif
2311 
2312   /// Returns the scalar type of the induction.
getScalarType()2313   const Type *getScalarType() const {
2314     return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe())
2315         ->getScalarType();
2316   }
2317 };
2318 
2319 /// A recipe for converting the canonical IV value to the corresponding value of
2320 /// an IV with different start and step values, using Start + CanonicalIV *
2321 /// Step.
2322 class VPDerivedIVRecipe : public VPSingleDefRecipe {
2323   /// If not nullptr, the result of the induction will get truncated to
2324   /// TruncResultTy.
2325   Type *TruncResultTy;
2326 
2327   /// Kind of the induction.
2328   const InductionDescriptor::InductionKind Kind;
2329   /// If not nullptr, the floating point induction binary operator. Must be set
2330   /// for floating point inductions.
2331   const FPMathOperator *FPBinOp;
2332 
2333 public:
VPDerivedIVRecipe(const InductionDescriptor & IndDesc,VPValue * Start,VPCanonicalIVPHIRecipe * CanonicalIV,VPValue * Step,Type * TruncResultTy)2334   VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
2335                     VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
2336                     Type *TruncResultTy)
2337       : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
2338         TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
2339         FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
2340   }
2341 
2342   ~VPDerivedIVRecipe() override = default;
2343 
2344   VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2345 
2346   /// Generate the transformed value of the induction at offset StartValue (1.
2347   /// operand) + IV (2. operand) * StepValue (3, operand).
2348   void execute(VPTransformState &State) override;
2349 
2350 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2351   /// Print the recipe.
2352   void print(raw_ostream &O, const Twine &Indent,
2353              VPSlotTracker &SlotTracker) const override;
2354 #endif
2355 
getScalarType()2356   Type *getScalarType() const {
2357     return TruncResultTy ? TruncResultTy
2358                          : getStartValue()->getLiveInIRValue()->getType();
2359   }
2360 
getStartValue()2361   VPValue *getStartValue() const { return getOperand(0); }
getCanonicalIV()2362   VPValue *getCanonicalIV() const { return getOperand(1); }
getStepValue()2363   VPValue *getStepValue() const { return getOperand(2); }
2364 
2365   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)2366   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2367     assert(is_contained(operands(), Op) &&
2368            "Op must be an operand of the recipe");
2369     return true;
2370   }
2371 };
2372 
2373 /// A recipe for handling phi nodes of integer and floating-point inductions,
2374 /// producing their scalar values.
2375 class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
2376   Instruction::BinaryOps InductionOpcode;
2377 
2378 public:
VPScalarIVStepsRecipe(VPValue * IV,VPValue * Step,Instruction::BinaryOps Opcode,FastMathFlags FMFs)2379   VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
2380                         Instruction::BinaryOps Opcode, FastMathFlags FMFs)
2381       : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2382                             ArrayRef<VPValue *>({IV, Step}), FMFs),
2383         InductionOpcode(Opcode) {}
2384 
VPScalarIVStepsRecipe(const InductionDescriptor & IndDesc,VPValue * IV,VPValue * Step)2385   VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
2386                         VPValue *Step)
2387       : VPScalarIVStepsRecipe(
2388             IV, Step, IndDesc.getInductionOpcode(),
2389             dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2390                 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2391                 : FastMathFlags()) {}
2392 
2393   ~VPScalarIVStepsRecipe() override = default;
2394 
2395   VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2396 
2397   /// Generate the scalarized versions of the phi node as needed by their users.
2398   void execute(VPTransformState &State) override;
2399 
2400 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2401   /// Print the recipe.
2402   void print(raw_ostream &O, const Twine &Indent,
2403              VPSlotTracker &SlotTracker) const override;
2404 #endif
2405 
getStepValue()2406   VPValue *getStepValue() const { return getOperand(1); }
2407 
2408   /// Returns true if the recipe only uses the first lane of operand \p Op.
onlyFirstLaneUsed(const VPValue * Op)2409   bool onlyFirstLaneUsed(const VPValue *Op) const override {
2410     assert(is_contained(operands(), Op) &&
2411            "Op must be an operand of the recipe");
2412     return true;
2413   }
2414 };
2415 
2416 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2417 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
2418 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2419 class VPBasicBlock : public VPBlockBase {
2420 public:
2421   using RecipeListTy = iplist<VPRecipeBase>;
2422 
2423 private:
2424   /// The VPRecipes held in the order of output instructions to generate.
2425   RecipeListTy Recipes;
2426 
2427 public:
2428   VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2429       : VPBlockBase(VPBasicBlockSC, Name.str()) {
2430     if (Recipe)
2431       appendRecipe(Recipe);
2432   }
2433 
~VPBasicBlock()2434   ~VPBasicBlock() override {
2435     while (!Recipes.empty())
2436       Recipes.pop_back();
2437   }
2438 
2439   /// Instruction iterators...
2440   using iterator = RecipeListTy::iterator;
2441   using const_iterator = RecipeListTy::const_iterator;
2442   using reverse_iterator = RecipeListTy::reverse_iterator;
2443   using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
2444 
2445   //===--------------------------------------------------------------------===//
2446   /// Recipe iterator methods
2447   ///
begin()2448   inline iterator begin() { return Recipes.begin(); }
begin()2449   inline const_iterator begin() const { return Recipes.begin(); }
end()2450   inline iterator end() { return Recipes.end(); }
end()2451   inline const_iterator end() const { return Recipes.end(); }
2452 
rbegin()2453   inline reverse_iterator rbegin() { return Recipes.rbegin(); }
rbegin()2454   inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
rend()2455   inline reverse_iterator rend() { return Recipes.rend(); }
rend()2456   inline const_reverse_iterator rend() const { return Recipes.rend(); }
2457 
size()2458   inline size_t size() const { return Recipes.size(); }
empty()2459   inline bool empty() const { return Recipes.empty(); }
front()2460   inline const VPRecipeBase &front() const { return Recipes.front(); }
front()2461   inline VPRecipeBase &front() { return Recipes.front(); }
back()2462   inline const VPRecipeBase &back() const { return Recipes.back(); }
back()2463   inline VPRecipeBase &back() { return Recipes.back(); }
2464 
2465   /// Returns a reference to the list of recipes.
getRecipeList()2466   RecipeListTy &getRecipeList() { return Recipes; }
2467 
2468   /// Returns a pointer to a member of the recipe list.
getSublistAccess(VPRecipeBase *)2469   static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
2470     return &VPBasicBlock::Recipes;
2471   }
2472 
2473   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)2474   static inline bool classof(const VPBlockBase *V) {
2475     return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
2476   }
2477 
insert(VPRecipeBase * Recipe,iterator InsertPt)2478   void insert(VPRecipeBase *Recipe, iterator InsertPt) {
2479     assert(Recipe && "No recipe to append.");
2480     assert(!Recipe->Parent && "Recipe already in VPlan");
2481     Recipe->Parent = this;
2482     Recipes.insert(InsertPt, Recipe);
2483   }
2484 
2485   /// Augment the existing recipes of a VPBasicBlock with an additional
2486   /// \p Recipe as the last recipe.
appendRecipe(VPRecipeBase * Recipe)2487   void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
2488 
2489   /// The method which generates the output IR instructions that correspond to
2490   /// this VPBasicBlock, thereby "executing" the VPlan.
2491   void execute(VPTransformState *State) override;
2492 
2493   /// Return the position of the first non-phi node recipe in the block.
2494   iterator getFirstNonPhi();
2495 
2496   /// Returns an iterator range over the PHI-like recipes in the block.
phis()2497   iterator_range<iterator> phis() {
2498     return make_range(begin(), getFirstNonPhi());
2499   }
2500 
2501   void dropAllReferences(VPValue *NewValue) override;
2502 
2503   /// Split current block at \p SplitAt by inserting a new block between the
2504   /// current block and its successors and moving all recipes starting at
2505   /// SplitAt to the new block. Returns the new block.
2506   VPBasicBlock *splitAt(iterator SplitAt);
2507 
2508   VPRegionBlock *getEnclosingLoopRegion();
2509 
2510 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2511   /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2512   /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2513   ///
2514   /// Note that the numbering is applied to the whole VPlan, so printing
2515   /// individual blocks is consistent with the whole VPlan printing.
2516   void print(raw_ostream &O, const Twine &Indent,
2517              VPSlotTracker &SlotTracker) const override;
2518   using VPBlockBase::print; // Get the print(raw_stream &O) version.
2519 #endif
2520 
2521   /// If the block has multiple successors, return the branch recipe terminating
2522   /// the block. If there are no or only a single successor, return nullptr;
2523   VPRecipeBase *getTerminator();
2524   const VPRecipeBase *getTerminator() const;
2525 
2526   /// Returns true if the block is exiting it's parent region.
2527   bool isExiting() const;
2528 
2529 private:
2530   /// Create an IR BasicBlock to hold the output instructions generated by this
2531   /// VPBasicBlock, and return it. Update the CFGState accordingly.
2532   BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2533 };
2534 
2535 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2536 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2537 /// A VPRegionBlock may indicate that its contents are to be replicated several
2538 /// times. This is designed to support predicated scalarization, in which a
2539 /// scalar if-then code structure needs to be generated VF * UF times. Having
2540 /// this replication indicator helps to keep a single model for multiple
2541 /// candidate VF's. The actual replication takes place only once the desired VF
2542 /// and UF have been determined.
2543 class VPRegionBlock : public VPBlockBase {
2544   /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2545   VPBlockBase *Entry;
2546 
2547   /// Hold the Single Exiting block of the SESE region modelled by the
2548   /// VPRegionBlock.
2549   VPBlockBase *Exiting;
2550 
2551   /// An indicator whether this region is to generate multiple replicated
2552   /// instances of output IR corresponding to its VPBlockBases.
2553   bool IsReplicator;
2554 
2555 public:
2556   VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
2557                 const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)2558       : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2559         IsReplicator(IsReplicator) {
2560     assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2561     assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2562     Entry->setParent(this);
2563     Exiting->setParent(this);
2564   }
2565   VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)2566       : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2567         IsReplicator(IsReplicator) {}
2568 
~VPRegionBlock()2569   ~VPRegionBlock() override {
2570     if (Entry) {
2571       VPValue DummyValue;
2572       Entry->dropAllReferences(&DummyValue);
2573       deleteCFG(Entry);
2574     }
2575   }
2576 
2577   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)2578   static inline bool classof(const VPBlockBase *V) {
2579     return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2580   }
2581 
getEntry()2582   const VPBlockBase *getEntry() const { return Entry; }
getEntry()2583   VPBlockBase *getEntry() { return Entry; }
2584 
2585   /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2586   /// EntryBlock must have no predecessors.
setEntry(VPBlockBase * EntryBlock)2587   void setEntry(VPBlockBase *EntryBlock) {
2588     assert(EntryBlock->getPredecessors().empty() &&
2589            "Entry block cannot have predecessors.");
2590     Entry = EntryBlock;
2591     EntryBlock->setParent(this);
2592   }
2593 
getExiting()2594   const VPBlockBase *getExiting() const { return Exiting; }
getExiting()2595   VPBlockBase *getExiting() { return Exiting; }
2596 
2597   /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
2598   /// ExitingBlock must have no successors.
setExiting(VPBlockBase * ExitingBlock)2599   void setExiting(VPBlockBase *ExitingBlock) {
2600     assert(ExitingBlock->getSuccessors().empty() &&
2601            "Exit block cannot have successors.");
2602     Exiting = ExitingBlock;
2603     ExitingBlock->setParent(this);
2604   }
2605 
2606   /// Returns the pre-header VPBasicBlock of the loop region.
getPreheaderVPBB()2607   VPBasicBlock *getPreheaderVPBB() {
2608     assert(!isReplicator() && "should only get pre-header of loop regions");
2609     return getSinglePredecessor()->getExitingBasicBlock();
2610   }
2611 
2612   /// An indicator whether this region is to generate multiple replicated
2613   /// instances of output IR corresponding to its VPBlockBases.
isReplicator()2614   bool isReplicator() const { return IsReplicator; }
2615 
2616   /// The method which generates the output IR instructions that correspond to
2617   /// this VPRegionBlock, thereby "executing" the VPlan.
2618   void execute(VPTransformState *State) override;
2619 
2620   void dropAllReferences(VPValue *NewValue) override;
2621 
2622 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2623   /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
2624   /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
2625   /// consequtive numbers.
2626   ///
2627   /// Note that the numbering is applied to the whole VPlan, so printing
2628   /// individual regions is consistent with the whole VPlan printing.
2629   void print(raw_ostream &O, const Twine &Indent,
2630              VPSlotTracker &SlotTracker) const override;
2631   using VPBlockBase::print; // Get the print(raw_stream &O) version.
2632 #endif
2633 };
2634 
2635 /// VPlan models a candidate for vectorization, encoding various decisions take
2636 /// to produce efficient output IR, including which branches, basic-blocks and
2637 /// output IR instructions to generate, and their cost. VPlan holds a
2638 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
2639 /// VPBasicBlock.
2640 class VPlan {
2641   friend class VPlanPrinter;
2642   friend class VPSlotTracker;
2643 
2644   /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
2645   /// preheader of the vector loop.
2646   VPBasicBlock *Entry;
2647 
2648   /// VPBasicBlock corresponding to the original preheader. Used to place
2649   /// VPExpandSCEV recipes for expressions used during skeleton creation and the
2650   /// rest of VPlan execution.
2651   VPBasicBlock *Preheader;
2652 
2653   /// Holds the VFs applicable to this VPlan.
2654   SmallSetVector<ElementCount, 2> VFs;
2655 
2656   /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
2657   /// any UF.
2658   SmallSetVector<unsigned, 2> UFs;
2659 
2660   /// Holds the name of the VPlan, for printing.
2661   std::string Name;
2662 
2663   /// Represents the trip count of the original loop, for folding
2664   /// the tail.
2665   VPValue *TripCount = nullptr;
2666 
2667   /// Represents the backedge taken count of the original loop, for folding
2668   /// the tail. It equals TripCount - 1.
2669   VPValue *BackedgeTakenCount = nullptr;
2670 
2671   /// Represents the vector trip count.
2672   VPValue VectorTripCount;
2673 
2674   /// Represents the loop-invariant VF * UF of the vector loop region.
2675   VPValue VFxUF;
2676 
2677   /// Holds a mapping between Values and their corresponding VPValue inside
2678   /// VPlan.
2679   Value2VPValueTy Value2VPValue;
2680 
2681   /// Contains all the external definitions created for this VPlan. External
2682   /// definitions are VPValues that hold a pointer to their underlying IR.
2683   SmallVector<VPValue *, 16> VPLiveInsToFree;
2684 
2685   /// Indicates whether it is safe use the Value2VPValue mapping or if the
2686   /// mapping cannot be used any longer, because it is stale.
2687   bool Value2VPValueEnabled = true;
2688 
2689   /// Values used outside the plan.
2690   MapVector<PHINode *, VPLiveOut *> LiveOuts;
2691 
2692   /// Mapping from SCEVs to the VPValues representing their expansions.
2693   /// NOTE: This mapping is temporary and will be removed once all users have
2694   /// been modeled in VPlan directly.
2695   DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
2696 
2697 public:
2698   /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
2699   /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
2700   /// be disconnected, as the bypass blocks between them are not yet modeled in
2701   /// VPlan.
VPlan(VPBasicBlock * Preheader,VPValue * TC,VPBasicBlock * Entry)2702   VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
2703       : VPlan(Preheader, Entry) {
2704     TripCount = TC;
2705   }
2706 
2707   /// Construct a VPlan with original preheader \p Preheader and \p Entry to
2708   /// the plan. At the moment, \p Preheader and \p Entry need to be
2709   /// disconnected, as the bypass blocks between them are not yet modeled in
2710   /// VPlan.
VPlan(VPBasicBlock * Preheader,VPBasicBlock * Entry)2711   VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
2712       : Entry(Entry), Preheader(Preheader) {
2713     Entry->setPlan(this);
2714     Preheader->setPlan(this);
2715     assert(Preheader->getNumSuccessors() == 0 &&
2716            Preheader->getNumPredecessors() == 0 &&
2717            "preheader must be disconnected");
2718   }
2719 
2720   ~VPlan();
2721 
2722   /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
2723   /// original scalar pre-header) which contains SCEV expansions that need to
2724   /// happen before the CFG is modified; a VPBasicBlock for the vector
2725   /// pre-header, followed by a region for the vector loop, followed by the
2726   /// middle VPBasicBlock.
2727   static VPlanPtr createInitialVPlan(const SCEV *TripCount,
2728                                      ScalarEvolution &PSE);
2729 
2730   /// Prepare the plan for execution, setting up the required live-in values.
2731   void prepareToExecute(Value *TripCount, Value *VectorTripCount,
2732                         Value *CanonicalIVStartValue, VPTransformState &State);
2733 
2734   /// Generate the IR code for this VPlan.
2735   void execute(VPTransformState *State);
2736 
getEntry()2737   VPBasicBlock *getEntry() { return Entry; }
getEntry()2738   const VPBasicBlock *getEntry() const { return Entry; }
2739 
2740   /// The trip count of the original loop.
getTripCount()2741   VPValue *getTripCount() const {
2742     assert(TripCount && "trip count needs to be set before accessing it");
2743     return TripCount;
2744   }
2745 
2746   /// The backedge taken count of the original loop.
getOrCreateBackedgeTakenCount()2747   VPValue *getOrCreateBackedgeTakenCount() {
2748     if (!BackedgeTakenCount)
2749       BackedgeTakenCount = new VPValue();
2750     return BackedgeTakenCount;
2751   }
2752 
2753   /// The vector trip count.
getVectorTripCount()2754   VPValue &getVectorTripCount() { return VectorTripCount; }
2755 
2756   /// Returns VF * UF of the vector loop region.
getVFxUF()2757   VPValue &getVFxUF() { return VFxUF; }
2758 
2759   /// Mark the plan to indicate that using Value2VPValue is not safe any
2760   /// longer, because it may be stale.
disableValue2VPValue()2761   void disableValue2VPValue() { Value2VPValueEnabled = false; }
2762 
addVF(ElementCount VF)2763   void addVF(ElementCount VF) { VFs.insert(VF); }
2764 
setVF(ElementCount VF)2765   void setVF(ElementCount VF) {
2766     assert(hasVF(VF) && "Cannot set VF not already in plan");
2767     VFs.clear();
2768     VFs.insert(VF);
2769   }
2770 
hasVF(ElementCount VF)2771   bool hasVF(ElementCount VF) { return VFs.count(VF); }
2772 
hasScalarVFOnly()2773   bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
2774 
hasUF(unsigned UF)2775   bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
2776 
setUF(unsigned UF)2777   void setUF(unsigned UF) {
2778     assert(hasUF(UF) && "Cannot set the UF not already in plan");
2779     UFs.clear();
2780     UFs.insert(UF);
2781   }
2782 
2783   /// Return a string with the name of the plan and the applicable VFs and UFs.
2784   std::string getName() const;
2785 
setName(const Twine & newName)2786   void setName(const Twine &newName) { Name = newName.str(); }
2787 
addVPValue(Value * V,VPValue * VPV)2788   void addVPValue(Value *V, VPValue *VPV) {
2789     assert((Value2VPValueEnabled || VPV->isLiveIn()) &&
2790            "Value2VPValue mapping may be out of date!");
2791     assert(V && "Trying to add a null Value to VPlan");
2792     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2793     Value2VPValue[V] = VPV;
2794   }
2795 
2796   /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
2797   ///   /// checking whether it is safe to query VPValues using IR Values.
2798   VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
2799     assert(V && "Trying to get the VPValue of a null Value");
2800     assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
2801     assert((Value2VPValueEnabled || OverrideAllowed ||
2802             Value2VPValue[V]->isLiveIn()) &&
2803            "Value2VPValue mapping may be out of date!");
2804     return Value2VPValue[V];
2805   }
2806 
2807   /// Gets the VPValue for \p V or adds a new live-in (if none exists yet) for
2808   /// \p V.
getVPValueOrAddLiveIn(Value * V)2809   VPValue *getVPValueOrAddLiveIn(Value *V) {
2810     assert(V && "Trying to get or add the VPValue of a null Value");
2811     if (!Value2VPValue.count(V)) {
2812       VPValue *VPV = new VPValue(V);
2813       VPLiveInsToFree.push_back(VPV);
2814       addVPValue(V, VPV);
2815     }
2816 
2817     return getVPValue(V);
2818   }
2819 
2820 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2821   /// Print the live-ins of this VPlan to \p O.
2822   void printLiveIns(raw_ostream &O) const;
2823 
2824   /// Print this VPlan to \p O.
2825   void print(raw_ostream &O) const;
2826 
2827   /// Print this VPlan in DOT format to \p O.
2828   void printDOT(raw_ostream &O) const;
2829 
2830   /// Dump the plan to stderr (for debugging).
2831   LLVM_DUMP_METHOD void dump() const;
2832 #endif
2833 
2834   /// Returns a range mapping the values the range \p Operands to their
2835   /// corresponding VPValues.
2836   iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
mapToVPValues(User::op_range Operands)2837   mapToVPValues(User::op_range Operands) {
2838     std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
2839       return getVPValueOrAddLiveIn(Op);
2840     };
2841     return map_range(Operands, Fn);
2842   }
2843 
2844   /// Returns the VPRegionBlock of the vector loop.
getVectorLoopRegion()2845   VPRegionBlock *getVectorLoopRegion() {
2846     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2847   }
getVectorLoopRegion()2848   const VPRegionBlock *getVectorLoopRegion() const {
2849     return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2850   }
2851 
2852   /// Returns the canonical induction recipe of the vector loop.
getCanonicalIV()2853   VPCanonicalIVPHIRecipe *getCanonicalIV() {
2854     VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
2855     if (EntryVPBB->empty()) {
2856       // VPlan native path.
2857       EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
2858     }
2859     return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
2860   }
2861 
2862   void addLiveOut(PHINode *PN, VPValue *V);
2863 
removeLiveOut(PHINode * PN)2864   void removeLiveOut(PHINode *PN) {
2865     delete LiveOuts[PN];
2866     LiveOuts.erase(PN);
2867   }
2868 
getLiveOuts()2869   const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
2870     return LiveOuts;
2871   }
2872 
getSCEVExpansion(const SCEV * S)2873   VPValue *getSCEVExpansion(const SCEV *S) const {
2874     return SCEVToExpansion.lookup(S);
2875   }
2876 
addSCEVExpansion(const SCEV * S,VPValue * V)2877   void addSCEVExpansion(const SCEV *S, VPValue *V) {
2878     assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
2879     SCEVToExpansion[S] = V;
2880   }
2881 
2882   /// \return The block corresponding to the original preheader.
getPreheader()2883   VPBasicBlock *getPreheader() { return Preheader; }
getPreheader()2884   const VPBasicBlock *getPreheader() const { return Preheader; }
2885 
2886 private:
2887   /// Add to the given dominator tree the header block and every new basic block
2888   /// that was created between it and the latch block, inclusive.
2889   static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
2890                                   BasicBlock *LoopPreHeaderBB,
2891                                   BasicBlock *LoopExitBB);
2892 };
2893 
2894 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2895 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
2896 /// indented and follows the dot format.
2897 class VPlanPrinter {
2898   raw_ostream &OS;
2899   const VPlan &Plan;
2900   unsigned Depth = 0;
2901   unsigned TabWidth = 2;
2902   std::string Indent;
2903   unsigned BID = 0;
2904   SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
2905 
2906   VPSlotTracker SlotTracker;
2907 
2908   /// Handle indentation.
bumpIndent(int b)2909   void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
2910 
2911   /// Print a given \p Block of the Plan.
2912   void dumpBlock(const VPBlockBase *Block);
2913 
2914   /// Print the information related to the CFG edges going out of a given
2915   /// \p Block, followed by printing the successor blocks themselves.
2916   void dumpEdges(const VPBlockBase *Block);
2917 
2918   /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
2919   /// its successor blocks.
2920   void dumpBasicBlock(const VPBasicBlock *BasicBlock);
2921 
2922   /// Print a given \p Region of the Plan.
2923   void dumpRegion(const VPRegionBlock *Region);
2924 
getOrCreateBID(const VPBlockBase * Block)2925   unsigned getOrCreateBID(const VPBlockBase *Block) {
2926     return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
2927   }
2928 
2929   Twine getOrCreateName(const VPBlockBase *Block);
2930 
2931   Twine getUID(const VPBlockBase *Block);
2932 
2933   /// Print the information related to a CFG edge between two VPBlockBases.
2934   void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
2935                 const Twine &Label);
2936 
2937 public:
VPlanPrinter(raw_ostream & O,const VPlan & P)2938   VPlanPrinter(raw_ostream &O, const VPlan &P)
2939       : OS(O), Plan(P), SlotTracker(&P) {}
2940 
2941   LLVM_DUMP_METHOD void dump();
2942 };
2943 
2944 struct VPlanIngredient {
2945   const Value *V;
2946 
VPlanIngredientVPlanIngredient2947   VPlanIngredient(const Value *V) : V(V) {}
2948 
2949   void print(raw_ostream &O) const;
2950 };
2951 
2952 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
2953   I.print(OS);
2954   return OS;
2955 }
2956 
2957 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
2958   Plan.print(OS);
2959   return OS;
2960 }
2961 #endif
2962 
2963 //===----------------------------------------------------------------------===//
2964 // VPlan Utilities
2965 //===----------------------------------------------------------------------===//
2966 
2967 /// Class that provides utilities for VPBlockBases in VPlan.
2968 class VPBlockUtils {
2969 public:
2970   VPBlockUtils() = delete;
2971 
2972   /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
2973   /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2974   /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
2975   /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
2976   /// have neither successors nor predecessors.
insertBlockAfter(VPBlockBase * NewBlock,VPBlockBase * BlockPtr)2977   static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
2978     assert(NewBlock->getSuccessors().empty() &&
2979            NewBlock->getPredecessors().empty() &&
2980            "Can't insert new block with predecessors or successors.");
2981     NewBlock->setParent(BlockPtr->getParent());
2982     SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
2983     for (VPBlockBase *Succ : Succs) {
2984       disconnectBlocks(BlockPtr, Succ);
2985       connectBlocks(NewBlock, Succ);
2986     }
2987     connectBlocks(BlockPtr, NewBlock);
2988   }
2989 
2990   /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
2991   /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
2992   /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2993   /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
2994   /// and \p IfTrue and \p IfFalse must have neither successors nor
2995   /// predecessors.
insertTwoBlocksAfter(VPBlockBase * IfTrue,VPBlockBase * IfFalse,VPBlockBase * BlockPtr)2996   static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
2997                                    VPBlockBase *BlockPtr) {
2998     assert(IfTrue->getSuccessors().empty() &&
2999            "Can't insert IfTrue with successors.");
3000     assert(IfFalse->getSuccessors().empty() &&
3001            "Can't insert IfFalse with successors.");
3002     BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3003     IfTrue->setPredecessors({BlockPtr});
3004     IfFalse->setPredecessors({BlockPtr});
3005     IfTrue->setParent(BlockPtr->getParent());
3006     IfFalse->setParent(BlockPtr->getParent());
3007   }
3008 
3009   /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3010   /// the successors of \p From and \p From to the predecessors of \p To. Both
3011   /// VPBlockBases must have the same parent, which can be null. Both
3012   /// VPBlockBases can be already connected to other VPBlockBases.
connectBlocks(VPBlockBase * From,VPBlockBase * To)3013   static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
3014     assert((From->getParent() == To->getParent()) &&
3015            "Can't connect two block with different parents");
3016     assert(From->getNumSuccessors() < 2 &&
3017            "Blocks can't have more than two successors.");
3018     From->appendSuccessor(To);
3019     To->appendPredecessor(From);
3020   }
3021 
3022   /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3023   /// from the successors of \p From and \p From from the predecessors of \p To.
disconnectBlocks(VPBlockBase * From,VPBlockBase * To)3024   static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
3025     assert(To && "Successor to disconnect is null.");
3026     From->removeSuccessor(To);
3027     To->removePredecessor(From);
3028   }
3029 
3030   /// Return an iterator range over \p Range which only includes \p BlockTy
3031   /// blocks. The accesses are casted to \p BlockTy.
3032   template <typename BlockTy, typename T>
blocksOnly(const T & Range)3033   static auto blocksOnly(const T &Range) {
3034     // Create BaseTy with correct const-ness based on BlockTy.
3035     using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3036                                       const VPBlockBase, VPBlockBase>;
3037 
3038     // We need to first create an iterator range over (const) BlocktTy & instead
3039     // of (const) BlockTy * for filter_range to work properly.
3040     auto Mapped =
3041         map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3042     auto Filter = make_filter_range(
3043         Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3044     return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3045       return cast<BlockTy>(&Block);
3046     });
3047   }
3048 };
3049 
3050 class VPInterleavedAccessInfo {
3051   DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
3052       InterleaveGroupMap;
3053 
3054   /// Type for mapping of instruction based interleave groups to VPInstruction
3055   /// interleave groups
3056   using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
3057                              InterleaveGroup<VPInstruction> *>;
3058 
3059   /// Recursively \p Region and populate VPlan based interleave groups based on
3060   /// \p IAI.
3061   void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3062                    InterleavedAccessInfo &IAI);
3063   /// Recursively traverse \p Block and populate VPlan based interleave groups
3064   /// based on \p IAI.
3065   void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3066                   InterleavedAccessInfo &IAI);
3067 
3068 public:
3069   VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
3070 
~VPInterleavedAccessInfo()3071   ~VPInterleavedAccessInfo() {
3072     SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
3073     // Avoid releasing a pointer twice.
3074     for (auto &I : InterleaveGroupMap)
3075       DelSet.insert(I.second);
3076     for (auto *Ptr : DelSet)
3077       delete Ptr;
3078   }
3079 
3080   /// Get the interleave group that \p Instr belongs to.
3081   ///
3082   /// \returns nullptr if doesn't have such group.
3083   InterleaveGroup<VPInstruction> *
getInterleaveGroup(VPInstruction * Instr)3084   getInterleaveGroup(VPInstruction *Instr) const {
3085     return InterleaveGroupMap.lookup(Instr);
3086   }
3087 };
3088 
3089 /// Class that maps (parts of) an existing VPlan to trees of combined
3090 /// VPInstructions.
3091 class VPlanSlp {
3092   enum class OpMode { Failed, Load, Opcode };
3093 
3094   /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3095   /// DenseMap keys.
3096   struct BundleDenseMapInfo {
getEmptyKeyBundleDenseMapInfo3097     static SmallVector<VPValue *, 4> getEmptyKey() {
3098       return {reinterpret_cast<VPValue *>(-1)};
3099     }
3100 
getTombstoneKeyBundleDenseMapInfo3101     static SmallVector<VPValue *, 4> getTombstoneKey() {
3102       return {reinterpret_cast<VPValue *>(-2)};
3103     }
3104 
getHashValueBundleDenseMapInfo3105     static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3106       return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3107     }
3108 
isEqualBundleDenseMapInfo3109     static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3110                         const SmallVector<VPValue *, 4> &RHS) {
3111       return LHS == RHS;
3112     }
3113   };
3114 
3115   /// Mapping of values in the original VPlan to a combined VPInstruction.
3116   DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
3117       BundleToCombined;
3118 
3119   VPInterleavedAccessInfo &IAI;
3120 
3121   /// Basic block to operate on. For now, only instructions in a single BB are
3122   /// considered.
3123   const VPBasicBlock &BB;
3124 
3125   /// Indicates whether we managed to combine all visited instructions or not.
3126   bool CompletelySLP = true;
3127 
3128   /// Width of the widest combined bundle in bits.
3129   unsigned WidestBundleBits = 0;
3130 
3131   using MultiNodeOpTy =
3132       typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3133 
3134   // Input operand bundles for the current multi node. Each multi node operand
3135   // bundle contains values not matching the multi node's opcode. They will
3136   // be reordered in reorderMultiNodeOps, once we completed building a
3137   // multi node.
3138   SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3139 
3140   /// Indicates whether we are building a multi node currently.
3141   bool MultiNodeActive = false;
3142 
3143   /// Check if we can vectorize Operands together.
3144   bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3145 
3146   /// Add combined instruction \p New for the bundle \p Operands.
3147   void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3148 
3149   /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3150   VPInstruction *markFailed();
3151 
3152   /// Reorder operands in the multi node to maximize sequential memory access
3153   /// and commutative operations.
3154   SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3155 
3156   /// Choose the best candidate to use for the lane after \p Last. The set of
3157   /// candidates to choose from are values with an opcode matching \p Last's
3158   /// or loads consecutive to \p Last.
3159   std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3160                                        SmallPtrSetImpl<VPValue *> &Candidates,
3161                                        VPInterleavedAccessInfo &IAI);
3162 
3163 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3164   /// Print bundle \p Values to dbgs().
3165   void dumpBundle(ArrayRef<VPValue *> Values);
3166 #endif
3167 
3168 public:
VPlanSlp(VPInterleavedAccessInfo & IAI,VPBasicBlock & BB)3169   VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3170 
3171   ~VPlanSlp() = default;
3172 
3173   /// Tries to build an SLP tree rooted at \p Operands and returns a
3174   /// VPInstruction combining \p Operands, if they can be combined.
3175   VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
3176 
3177   /// Return the width of the widest combined bundle in bits.
getWidestBundleBits()3178   unsigned getWidestBundleBits() const { return WidestBundleBits; }
3179 
3180   /// Return true if all visited instruction can be combined.
isCompletelySLP()3181   bool isCompletelySLP() const { return CompletelySLP; }
3182 };
3183 
3184 namespace vputils {
3185 
3186 /// Returns true if only the first lane of \p Def is used.
3187 bool onlyFirstLaneUsed(VPValue *Def);
3188 
3189 /// Returns true if only the first part of \p Def is used.
3190 bool onlyFirstPartUsed(VPValue *Def);
3191 
3192 /// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3193 /// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3194 /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3195 /// pre-header already contains a recipe expanding \p Expr, return it. If not,
3196 /// create a new one.
3197 VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3198                                        ScalarEvolution &SE);
3199 
3200 /// Returns true if \p VPV is uniform after vectorization.
isUniformAfterVectorization(VPValue * VPV)3201 inline bool isUniformAfterVectorization(VPValue *VPV) {
3202   // A value defined outside the vector region must be uniform after
3203   // vectorization inside a vector region.
3204   if (VPV->isDefinedOutsideVectorRegions())
3205     return true;
3206   VPRecipeBase *Def = VPV->getDefiningRecipe();
3207   assert(Def && "Must have definition for value defined inside vector region");
3208   if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3209     return Rep->isUniform();
3210   if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3211     return all_of(GEP->operands(), isUniformAfterVectorization);
3212   if (auto *VPI = dyn_cast<VPInstruction>(Def))
3213     return VPI->getOpcode() == VPInstruction::ComputeReductionResult;
3214   return false;
3215 }
3216 } // end namespace vputils
3217 
3218 } // end namespace llvm
3219 
3220 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
3221