1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include <queue>
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
36 
37 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
38     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39     cl::desc("Disable looking through phis when inserting vsetvlis."));
40 
41 static cl::opt<bool> UseStrictAsserts(
42     "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43     cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44 
45 namespace {
46 
47 static unsigned getVLOpNum(const MachineInstr &MI) {
48   return RISCVII::getVLOpNum(MI.getDesc());
49 }
50 
51 static unsigned getSEWOpNum(const MachineInstr &MI) {
52   return RISCVII::getSEWOpNum(MI.getDesc());
53 }
54 
55 static bool isVectorConfigInstr(const MachineInstr &MI) {
56   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
57          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
58          MI.getOpcode() == RISCV::PseudoVSETIVLI;
59 }
60 
61 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
62 /// VL and only sets VTYPE.
63 static bool isVLPreservingConfig(const MachineInstr &MI) {
64   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
65     return false;
66   assert(RISCV::X0 == MI.getOperand(1).getReg());
67   return RISCV::X0 == MI.getOperand(0).getReg();
68 }
69 
70 static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
71   const RISCVVPseudosTable::PseudoInfo *RVV =
72       RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
73   if (!RVV)
74     return 0;
75   return RVV->BaseInstr;
76 }
77 
78 static bool isScalarMoveInstr(const MachineInstr &MI) {
79   switch (getRVVMCOpcode(MI.getOpcode())) {
80   default:
81     return false;
82   case RISCV::VMV_S_X:
83   case RISCV::VFMV_S_F:
84     return true;
85   }
86 }
87 
88 static bool isScalarSplatInstr(const MachineInstr &MI) {
89   switch (getRVVMCOpcode(MI.getOpcode())) {
90   default:
91     return false;
92   case RISCV::VMV_V_I:
93   case RISCV::VMV_V_X:
94   case RISCV::VFMV_V_F:
95     return true;
96   }
97 }
98 
99 static bool isVSlideInstr(const MachineInstr &MI) {
100   switch (getRVVMCOpcode(MI.getOpcode())) {
101   default:
102     return false;
103   case RISCV::VSLIDEDOWN_VX:
104   case RISCV::VSLIDEDOWN_VI:
105   case RISCV::VSLIDEUP_VX:
106   case RISCV::VSLIDEUP_VI:
107     return true;
108   }
109 }
110 
111 /// Get the EEW for a load or store instruction.  Return std::nullopt if MI is
112 /// not a load or store which ignores SEW.
113 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
114   switch (getRVVMCOpcode(MI.getOpcode())) {
115   default:
116     return std::nullopt;
117   case RISCV::VLE8_V:
118   case RISCV::VLSE8_V:
119   case RISCV::VSE8_V:
120   case RISCV::VSSE8_V:
121     return 8;
122   case RISCV::VLE16_V:
123   case RISCV::VLSE16_V:
124   case RISCV::VSE16_V:
125   case RISCV::VSSE16_V:
126     return 16;
127   case RISCV::VLE32_V:
128   case RISCV::VLSE32_V:
129   case RISCV::VSE32_V:
130   case RISCV::VSSE32_V:
131     return 32;
132   case RISCV::VLE64_V:
133   case RISCV::VLSE64_V:
134   case RISCV::VSE64_V:
135   case RISCV::VSSE64_V:
136     return 64;
137   }
138 }
139 
140 /// Return true if this is an operation on mask registers.  Note that
141 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
142 static bool isMaskRegOp(const MachineInstr &MI) {
143   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
144     return false;
145   const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
146   // A Log2SEW of 0 is an operation on mask registers only.
147   return Log2SEW == 0;
148 }
149 
150 /// Return true if the inactive elements in the result are entirely undefined.
151 /// Note that this is different from "agnostic" as defined by the vector
152 /// specification.  Agnostic requires each lane to either be undisturbed, or
153 /// take the value -1; no other value is allowed.
154 static bool hasUndefinedMergeOp(const MachineInstr &MI,
155                                 const MachineRegisterInfo &MRI) {
156 
157   unsigned UseOpIdx;
158   if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
159     // If there is no passthrough operand, then the pass through
160     // lanes are undefined.
161     return true;
162 
163   // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands
164   // are solely IMPLICIT_DEFS), the pass through lanes are undefined.
165   const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
166   if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
167     if (UseMI->isImplicitDef())
168       return true;
169 
170     if (UseMI->isRegSequence()) {
171       for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
172         MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
173         if (!SourceMI || !SourceMI->isImplicitDef())
174           return false;
175       }
176       return true;
177     }
178   }
179   return false;
180 }
181 
182 /// Which subfields of VL or VTYPE have values we need to preserve?
183 struct DemandedFields {
184   // Some unknown property of VL is used.  If demanded, must preserve entire
185   // value.
186   bool VLAny = false;
187   // Only zero vs non-zero is used. If demanded, can change non-zero values.
188   bool VLZeroness = false;
189   // What properties of SEW we need to preserve.
190   enum : uint8_t {
191     SEWEqual = 2,              // The exact value of SEW needs to be preserved.
192     SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
193                                // than or equal to the original value.
194     SEWNone = 0                // We don't need to preserve SEW at all.
195   } SEW = SEWNone;
196   bool LMUL = false;
197   bool SEWLMULRatio = false;
198   bool TailPolicy = false;
199   bool MaskPolicy = false;
200 
201   // Return true if any part of VTYPE was used
202   bool usedVTYPE() const {
203     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
204   }
205 
206   // Return true if any property of VL was used
207   bool usedVL() {
208     return VLAny || VLZeroness;
209   }
210 
211   // Mark all VTYPE subfields and properties as demanded
212   void demandVTYPE() {
213     SEW = SEWEqual;
214     LMUL = true;
215     SEWLMULRatio = true;
216     TailPolicy = true;
217     MaskPolicy = true;
218   }
219 
220   // Mark all VL properties as demanded
221   void demandVL() {
222     VLAny = true;
223     VLZeroness = true;
224   }
225 
226 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
227   /// Support for debugging, callable in GDB: V->dump()
228   LLVM_DUMP_METHOD void dump() const {
229     print(dbgs());
230     dbgs() << "\n";
231   }
232 
233   /// Implement operator<<.
234   void print(raw_ostream &OS) const {
235     OS << "{";
236     OS << "VLAny=" << VLAny << ", ";
237     OS << "VLZeroness=" << VLZeroness << ", ";
238     OS << "SEW=";
239     switch (SEW) {
240     case SEWEqual:
241       OS << "SEWEqual";
242       break;
243     case SEWGreaterThanOrEqual:
244       OS << "SEWGreaterThanOrEqual";
245       break;
246     case SEWNone:
247       OS << "SEWNone";
248       break;
249     };
250     OS << ", ";
251     OS << "LMUL=" << LMUL << ", ";
252     OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
253     OS << "TailPolicy=" << TailPolicy << ", ";
254     OS << "MaskPolicy=" << MaskPolicy;
255     OS << "}";
256   }
257 #endif
258 };
259 
260 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
261 LLVM_ATTRIBUTE_USED
262 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
263   DF.print(OS);
264   return OS;
265 }
266 #endif
267 
268 /// Return true if moving from CurVType to NewVType is
269 /// indistinguishable from the perspective of an instruction (or set
270 /// of instructions) which use only the Used subfields and properties.
271 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
272                                 const DemandedFields &Used) {
273   if (Used.SEW == DemandedFields::SEWEqual &&
274       RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
275     return false;
276 
277   if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual &&
278       RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
279     return false;
280 
281   if (Used.LMUL &&
282       RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
283     return false;
284 
285   if (Used.SEWLMULRatio) {
286     auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
287                                               RISCVVType::getVLMUL(CurVType));
288     auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
289                                               RISCVVType::getVLMUL(NewVType));
290     if (Ratio1 != Ratio2)
291       return false;
292   }
293 
294   if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
295                              RISCVVType::isTailAgnostic(NewVType))
296     return false;
297   if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
298                              RISCVVType::isMaskAgnostic(NewVType))
299     return false;
300   return true;
301 }
302 
303 /// Return the fields and properties demanded by the provided instruction.
304 DemandedFields getDemanded(const MachineInstr &MI,
305                            const MachineRegisterInfo *MRI) {
306   // Warning: This function has to work on both the lowered (i.e. post
307   // emitVSETVLIs) and pre-lowering forms.  The main implication of this is
308   // that it can't use the value of a SEW, VL, or Policy operand as they might
309   // be stale after lowering.
310 
311   // Most instructions don't use any of these subfeilds.
312   DemandedFields Res;
313   // Start conservative if registers are used
314   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
315     Res.demandVL();
316   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
317     Res.demandVTYPE();
318   // Start conservative on the unlowered form too
319   uint64_t TSFlags = MI.getDesc().TSFlags;
320   if (RISCVII::hasSEWOp(TSFlags)) {
321     Res.demandVTYPE();
322     if (RISCVII::hasVLOp(TSFlags))
323       Res.demandVL();
324 
325     // Behavior is independent of mask policy.
326     if (!RISCVII::usesMaskPolicy(TSFlags))
327       Res.MaskPolicy = false;
328   }
329 
330   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
331   // They instead demand the ratio of the two which is used in computing
332   // EMUL, but which allows us the flexibility to change SEW and LMUL
333   // provided we don't change the ratio.
334   // Note: We assume that the instructions initial SEW is the EEW encoded
335   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
336   if (getEEWForLoadStore(MI)) {
337     Res.SEW = DemandedFields::SEWNone;
338     Res.LMUL = false;
339   }
340 
341   // Store instructions don't use the policy fields.
342   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
343     Res.TailPolicy = false;
344     Res.MaskPolicy = false;
345   }
346 
347   // If this is a mask reg operation, it only cares about VLMAX.
348   // TODO: Possible extensions to this logic
349   // * Probably ok if available VLMax is larger than demanded
350   // * The policy bits can probably be ignored..
351   if (isMaskRegOp(MI)) {
352     Res.SEW = DemandedFields::SEWNone;
353     Res.LMUL = false;
354   }
355 
356   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
357   if (isScalarMoveInstr(MI)) {
358     Res.LMUL = false;
359     Res.SEWLMULRatio = false;
360     Res.VLAny = false;
361     // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
362     // need to preserve any other bits and are thus compatible with any larger,
363     // etype and can disregard policy bits.  Warning: It's tempting to try doing
364     // this for any tail agnostic operation, but we can't as TA requires
365     // tail lanes to either be the original value or -1.  We are writing
366     // unknown bits to the lanes here.
367     if (hasUndefinedMergeOp(MI, *MRI)) {
368       Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
369       Res.TailPolicy = false;
370     }
371   }
372 
373   return Res;
374 }
375 
376 /// Defines the abstract state with which the forward dataflow models the
377 /// values of the VL and VTYPE registers after insertion.
378 class VSETVLIInfo {
379   union {
380     Register AVLReg;
381     unsigned AVLImm;
382   };
383 
384   enum : uint8_t {
385     Uninitialized,
386     AVLIsReg,
387     AVLIsImm,
388     Unknown,
389   } State = Uninitialized;
390 
391   // Fields from VTYPE.
392   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
393   uint8_t SEW = 0;
394   uint8_t TailAgnostic : 1;
395   uint8_t MaskAgnostic : 1;
396   uint8_t SEWLMULRatioOnly : 1;
397 
398 public:
399   VSETVLIInfo()
400       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
401         SEWLMULRatioOnly(false) {}
402 
403   static VSETVLIInfo getUnknown() {
404     VSETVLIInfo Info;
405     Info.setUnknown();
406     return Info;
407   }
408 
409   bool isValid() const { return State != Uninitialized; }
410   void setUnknown() { State = Unknown; }
411   bool isUnknown() const { return State == Unknown; }
412 
413   void setAVLReg(Register Reg) {
414     AVLReg = Reg;
415     State = AVLIsReg;
416   }
417 
418   void setAVLImm(unsigned Imm) {
419     AVLImm = Imm;
420     State = AVLIsImm;
421   }
422 
423   bool hasAVLImm() const { return State == AVLIsImm; }
424   bool hasAVLReg() const { return State == AVLIsReg; }
425   Register getAVLReg() const {
426     assert(hasAVLReg());
427     return AVLReg;
428   }
429   unsigned getAVLImm() const {
430     assert(hasAVLImm());
431     return AVLImm;
432   }
433 
434   unsigned getSEW() const { return SEW; }
435   RISCVII::VLMUL getVLMUL() const { return VLMul; }
436 
437   bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
438     if (hasAVLImm())
439       return getAVLImm() > 0;
440     if (hasAVLReg()) {
441       if (getAVLReg() == RISCV::X0)
442         return true;
443       if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
444           MI && MI->getOpcode() == RISCV::ADDI &&
445           MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
446           MI->getOperand(1).getReg() == RISCV::X0 &&
447           MI->getOperand(2).getImm() != 0)
448         return true;
449       return false;
450     }
451     return false;
452   }
453 
454   bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
455                          const MachineRegisterInfo &MRI) const {
456     if (hasSameAVL(Other))
457       return true;
458     return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
459   }
460 
461   bool hasSameAVL(const VSETVLIInfo &Other) const {
462     if (hasAVLReg() && Other.hasAVLReg())
463       return getAVLReg() == Other.getAVLReg();
464 
465     if (hasAVLImm() && Other.hasAVLImm())
466       return getAVLImm() == Other.getAVLImm();
467 
468     return false;
469   }
470 
471   void setVTYPE(unsigned VType) {
472     assert(isValid() && !isUnknown() &&
473            "Can't set VTYPE for uninitialized or unknown");
474     VLMul = RISCVVType::getVLMUL(VType);
475     SEW = RISCVVType::getSEW(VType);
476     TailAgnostic = RISCVVType::isTailAgnostic(VType);
477     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
478   }
479   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
480     assert(isValid() && !isUnknown() &&
481            "Can't set VTYPE for uninitialized or unknown");
482     VLMul = L;
483     SEW = S;
484     TailAgnostic = TA;
485     MaskAgnostic = MA;
486   }
487 
488   unsigned encodeVTYPE() const {
489     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
490            "Can't encode VTYPE for uninitialized or unknown");
491     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
492   }
493 
494   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
495 
496   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
497     assert(isValid() && Other.isValid() &&
498            "Can't compare invalid VSETVLIInfos");
499     assert(!isUnknown() && !Other.isUnknown() &&
500            "Can't compare VTYPE in unknown state");
501     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
502            "Can't compare when only LMUL/SEW ratio is valid.");
503     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
504            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
505                     Other.MaskAgnostic);
506   }
507 
508   unsigned getSEWLMULRatio() const {
509     assert(isValid() && !isUnknown() &&
510            "Can't use VTYPE for uninitialized or unknown");
511     return RISCVVType::getSEWLMULRatio(SEW, VLMul);
512   }
513 
514   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
515   // Note that having the same VLMAX ensures that both share the same
516   // function from AVL to VL; that is, they must produce the same VL value
517   // for any given AVL value.
518   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
519     assert(isValid() && Other.isValid() &&
520            "Can't compare invalid VSETVLIInfos");
521     assert(!isUnknown() && !Other.isUnknown() &&
522            "Can't compare VTYPE in unknown state");
523     return getSEWLMULRatio() == Other.getSEWLMULRatio();
524   }
525 
526   bool hasCompatibleVTYPE(const DemandedFields &Used,
527                           const VSETVLIInfo &Require) const {
528     return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
529   }
530 
531   // Determine whether the vector instructions requirements represented by
532   // Require are compatible with the previous vsetvli instruction represented
533   // by this.  MI is the instruction whose requirements we're considering.
534   bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
535                     const MachineRegisterInfo &MRI) const {
536     assert(isValid() && Require.isValid() &&
537            "Can't compare invalid VSETVLIInfos");
538     assert(!Require.SEWLMULRatioOnly &&
539            "Expected a valid VTYPE for instruction!");
540     // Nothing is compatible with Unknown.
541     if (isUnknown() || Require.isUnknown())
542       return false;
543 
544     // If only our VLMAX ratio is valid, then this isn't compatible.
545     if (SEWLMULRatioOnly)
546       return false;
547 
548     // If the instruction doesn't need an AVLReg and the SEW matches, consider
549     // it compatible.
550     if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
551       if (SEW == Require.SEW)
552         return true;
553 
554     if (Used.VLAny && !hasSameAVL(Require))
555       return false;
556 
557     if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
558       return false;
559 
560     return hasCompatibleVTYPE(Used, Require);
561   }
562 
563   bool operator==(const VSETVLIInfo &Other) const {
564     // Uninitialized is only equal to another Uninitialized.
565     if (!isValid())
566       return !Other.isValid();
567     if (!Other.isValid())
568       return !isValid();
569 
570     // Unknown is only equal to another Unknown.
571     if (isUnknown())
572       return Other.isUnknown();
573     if (Other.isUnknown())
574       return isUnknown();
575 
576     if (!hasSameAVL(Other))
577       return false;
578 
579     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
580     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
581       return false;
582 
583     // If only the VLMAX is valid, check that it is the same.
584     if (SEWLMULRatioOnly)
585       return hasSameVLMAX(Other);
586 
587     // If the full VTYPE is valid, check that it is the same.
588     return hasSameVTYPE(Other);
589   }
590 
591   bool operator!=(const VSETVLIInfo &Other) const {
592     return !(*this == Other);
593   }
594 
595   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
596   // both predecessors.
597   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
598     // If the new value isn't valid, ignore it.
599     if (!Other.isValid())
600       return *this;
601 
602     // If this value isn't valid, this must be the first predecessor, use it.
603     if (!isValid())
604       return Other;
605 
606     // If either is unknown, the result is unknown.
607     if (isUnknown() || Other.isUnknown())
608       return VSETVLIInfo::getUnknown();
609 
610     // If we have an exact, match return this.
611     if (*this == Other)
612       return *this;
613 
614     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
615     // return an SEW/LMUL ratio only value.
616     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
617       VSETVLIInfo MergeInfo = *this;
618       MergeInfo.SEWLMULRatioOnly = true;
619       return MergeInfo;
620     }
621 
622     // Otherwise the result is unknown.
623     return VSETVLIInfo::getUnknown();
624   }
625 
626 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
627   /// Support for debugging, callable in GDB: V->dump()
628   LLVM_DUMP_METHOD void dump() const {
629     print(dbgs());
630     dbgs() << "\n";
631   }
632 
633   /// Implement operator<<.
634   /// @{
635   void print(raw_ostream &OS) const {
636     OS << "{";
637     if (!isValid())
638       OS << "Uninitialized";
639     if (isUnknown())
640       OS << "unknown";
641     if (hasAVLReg())
642       OS << "AVLReg=" << (unsigned)AVLReg;
643     if (hasAVLImm())
644       OS << "AVLImm=" << (unsigned)AVLImm;
645     OS << ", "
646        << "VLMul=" << (unsigned)VLMul << ", "
647        << "SEW=" << (unsigned)SEW << ", "
648        << "TailAgnostic=" << (bool)TailAgnostic << ", "
649        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
650        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
651   }
652 #endif
653 };
654 
655 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
656 LLVM_ATTRIBUTE_USED
657 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
658   V.print(OS);
659   return OS;
660 }
661 #endif
662 
663 struct BlockData {
664   // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
665   // made by this block. Calculated in Phase 1.
666   VSETVLIInfo Change;
667 
668   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
669   // block. Calculated in Phase 2.
670   VSETVLIInfo Exit;
671 
672   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
673   // blocks. Calculated in Phase 2, and used by Phase 3.
674   VSETVLIInfo Pred;
675 
676   // Keeps track of whether the block is already in the queue.
677   bool InQueue = false;
678 
679   BlockData() = default;
680 };
681 
682 class RISCVInsertVSETVLI : public MachineFunctionPass {
683   const TargetInstrInfo *TII;
684   MachineRegisterInfo *MRI;
685 
686   std::vector<BlockData> BlockInfo;
687   std::queue<const MachineBasicBlock *> WorkList;
688 
689 public:
690   static char ID;
691 
692   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
693     initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
694   }
695   bool runOnMachineFunction(MachineFunction &MF) override;
696 
697   void getAnalysisUsage(AnalysisUsage &AU) const override {
698     AU.setPreservesCFG();
699     MachineFunctionPass::getAnalysisUsage(AU);
700   }
701 
702   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
703 
704 private:
705   bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
706                    const VSETVLIInfo &CurInfo) const;
707   bool needVSETVLIPHI(const VSETVLIInfo &Require,
708                       const MachineBasicBlock &MBB) const;
709   void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
710                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
711   void insertVSETVLI(MachineBasicBlock &MBB,
712                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
713                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
714 
715   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
716   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
717   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
718   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
719   void emitVSETVLIs(MachineBasicBlock &MBB);
720   void doLocalPostpass(MachineBasicBlock &MBB);
721   void doPRE(MachineBasicBlock &MBB);
722   void insertReadVL(MachineBasicBlock &MBB);
723 };
724 
725 } // end anonymous namespace
726 
727 char RISCVInsertVSETVLI::ID = 0;
728 
729 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
730                 false, false)
731 
732 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
733                                        const MachineRegisterInfo *MRI) {
734   VSETVLIInfo InstrInfo;
735 
736   bool TailAgnostic = true;
737   bool MaskAgnostic = true;
738   if (!hasUndefinedMergeOp(MI, *MRI)) {
739     // Start with undisturbed.
740     TailAgnostic = false;
741     MaskAgnostic = false;
742 
743     // If there is a policy operand, use it.
744     if (RISCVII::hasVecPolicyOp(TSFlags)) {
745       const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
746       uint64_t Policy = Op.getImm();
747       assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
748              "Invalid Policy Value");
749       TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
750       MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
751     }
752 
753     // Some pseudo instructions force a tail agnostic policy despite having a
754     // tied def.
755     if (RISCVII::doesForceTailAgnostic(TSFlags))
756       TailAgnostic = true;
757 
758     if (!RISCVII::usesMaskPolicy(TSFlags))
759       MaskAgnostic = true;
760   }
761 
762   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
763 
764   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
765   // A Log2SEW of 0 is an operation on mask registers only.
766   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
767   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
768 
769   if (RISCVII::hasVLOp(TSFlags)) {
770     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
771     if (VLOp.isImm()) {
772       int64_t Imm = VLOp.getImm();
773       // Conver the VLMax sentintel to X0 register.
774       if (Imm == RISCV::VLMaxSentinel)
775         InstrInfo.setAVLReg(RISCV::X0);
776       else
777         InstrInfo.setAVLImm(Imm);
778     } else {
779       InstrInfo.setAVLReg(VLOp.getReg());
780     }
781   } else {
782     InstrInfo.setAVLReg(RISCV::NoRegister);
783   }
784 #ifndef NDEBUG
785   if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
786     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
787   }
788 #endif
789   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
790 
791   return InstrInfo;
792 }
793 
794 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
795                                        const VSETVLIInfo &Info,
796                                        const VSETVLIInfo &PrevInfo) {
797   DebugLoc DL = MI.getDebugLoc();
798   insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
799 }
800 
801 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
802 // VSETIVLI instruction.
803 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
804   VSETVLIInfo NewInfo;
805   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
806     NewInfo.setAVLImm(MI.getOperand(1).getImm());
807   } else {
808     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
809            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
810     Register AVLReg = MI.getOperand(1).getReg();
811     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
812            "Can't handle X0, X0 vsetvli yet");
813     NewInfo.setAVLReg(AVLReg);
814   }
815   NewInfo.setVTYPE(MI.getOperand(2).getImm());
816 
817   return NewInfo;
818 }
819 
820 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
821                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
822                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
823 
824   if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
825     // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
826     // VLMAX.
827     if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
828       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
829           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
830           .addReg(RISCV::X0, RegState::Kill)
831           .addImm(Info.encodeVTYPE())
832           .addReg(RISCV::VL, RegState::Implicit);
833       return;
834     }
835 
836     // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
837     // it has the same VLMAX we want and the last VL/VTYPE we observed is the
838     // same, we can use the X0, X0 form.
839     if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() &&
840         Info.getAVLReg().isVirtual()) {
841       if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) {
842         if (isVectorConfigInstr(*DefMI)) {
843           VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
844           if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
845             BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
846                 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
847                 .addReg(RISCV::X0, RegState::Kill)
848                 .addImm(Info.encodeVTYPE())
849                 .addReg(RISCV::VL, RegState::Implicit);
850             return;
851           }
852         }
853       }
854     }
855   }
856 
857   if (Info.hasAVLImm()) {
858     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
859         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
860         .addImm(Info.getAVLImm())
861         .addImm(Info.encodeVTYPE());
862     return;
863   }
864 
865   Register AVLReg = Info.getAVLReg();
866   if (AVLReg == RISCV::NoRegister) {
867     // We can only use x0, x0 if there's no chance of the vtype change causing
868     // the previous vl to become invalid.
869     if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
870         Info.hasSameVLMAX(PrevInfo)) {
871       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
872           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
873           .addReg(RISCV::X0, RegState::Kill)
874           .addImm(Info.encodeVTYPE())
875           .addReg(RISCV::VL, RegState::Implicit);
876       return;
877     }
878     // Otherwise use an AVL of 0 to avoid depending on previous vl.
879     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
880         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
881         .addImm(0)
882         .addImm(Info.encodeVTYPE());
883     return;
884   }
885 
886   if (AVLReg.isVirtual())
887     MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
888 
889   // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
890   // opcode if the AVLReg is X0 as they have different register classes for
891   // the AVL operand.
892   Register DestReg = RISCV::X0;
893   unsigned Opcode = RISCV::PseudoVSETVLI;
894   if (AVLReg == RISCV::X0) {
895     DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
896     Opcode = RISCV::PseudoVSETVLIX0;
897   }
898   BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
899       .addReg(DestReg, RegState::Define | RegState::Dead)
900       .addReg(AVLReg)
901       .addImm(Info.encodeVTYPE());
902 }
903 
904 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
905   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
906   return Fractional || LMul == 1;
907 }
908 
909 /// Return true if a VSETVLI is required to transition from CurInfo to Require
910 /// before MI.
911 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
912                                      const VSETVLIInfo &Require,
913                                      const VSETVLIInfo &CurInfo) const {
914   assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
915 
916   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
917     return true;
918 
919   DemandedFields Used = getDemanded(MI, MRI);
920 
921   // A slidedown/slideup with an *undefined* merge op can freely clobber
922   // elements not copied from the source vector (e.g. masked off, tail, or
923   // slideup's prefix). Notes:
924   // * We can't modify SEW here since the slide amount is in units of SEW.
925   // * VL=1 is special only because we have existing support for zero vs
926   //   non-zero VL.  We could generalize this if we had a VL > C predicate.
927   // * The LMUL1 restriction is for machines whose latency may depend on VL.
928   // * As above, this is only legal for tail "undefined" not "agnostic".
929   if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
930       isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
931     Used.VLAny = false;
932     Used.VLZeroness = true;
933     Used.LMUL = false;
934     Used.TailPolicy = false;
935   }
936 
937   // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
938   // semantically as vmv.s.x.  This is particularly useful since we don't have an
939   // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
940   // Since a splat is non-constant time in LMUL, we do need to be careful to not
941   // increase the number of active vector registers (unlike for vmv.s.x.)
942   if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
943       isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
944     Used.LMUL = false;
945     Used.SEWLMULRatio = false;
946     Used.VLAny = false;
947     Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
948     Used.TailPolicy = false;
949   }
950 
951   if (CurInfo.isCompatible(Used, Require, *MRI))
952     return false;
953 
954   // We didn't find a compatible value. If our AVL is a virtual register,
955   // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
956   // and the last VL/VTYPE we observed is the same, we don't need a
957   // VSETVLI here.
958   if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
959       CurInfo.hasCompatibleVTYPE(Used, Require)) {
960     if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
961       if (isVectorConfigInstr(*DefMI)) {
962         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
963         if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
964           return false;
965       }
966     }
967   }
968 
969   return true;
970 }
971 
972 // Given an incoming state reaching MI, modifies that state so that it is minimally
973 // compatible with MI.  The resulting state is guaranteed to be semantically legal
974 // for MI, but may not be the state requested by MI.
975 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
976   uint64_t TSFlags = MI.getDesc().TSFlags;
977   if (!RISCVII::hasSEWOp(TSFlags))
978     return;
979 
980   const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
981   if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
982     return;
983 
984   const VSETVLIInfo PrevInfo = Info;
985   Info = NewInfo;
986 
987   if (!RISCVII::hasVLOp(TSFlags))
988     return;
989 
990   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
991   // VL > 0. We can discard the user requested AVL and just use the last
992   // one if we can prove it equally zero.  This removes a vsetvli entirely
993   // if the types match or allows use of cheaper avl preserving variant
994   // if VLMAX doesn't change.  If VLMAX might change, we couldn't use
995   // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
996   // prevent extending live range of an avl register operand.
997   // TODO: We can probably relax this for immediates.
998   if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
999       PrevInfo.hasEquallyZeroAVL(Info, *MRI) &&
1000       Info.hasSameVLMAX(PrevInfo)) {
1001     if (PrevInfo.hasAVLImm())
1002       Info.setAVLImm(PrevInfo.getAVLImm());
1003     else
1004       Info.setAVLReg(PrevInfo.getAVLReg());
1005     return;
1006   }
1007 
1008   // If AVL is defined by a vsetvli with the same VLMAX, we can
1009   // replace the AVL operand with the AVL of the defining vsetvli.
1010   // We avoid general register AVLs to avoid extending live ranges
1011   // without being sure we can kill the original source reg entirely.
1012   if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
1013     return;
1014   MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
1015   if (!DefMI || !isVectorConfigInstr(*DefMI))
1016     return;
1017 
1018   VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1019   if (DefInfo.hasSameVLMAX(Info) &&
1020       (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1021     if (DefInfo.hasAVLImm())
1022       Info.setAVLImm(DefInfo.getAVLImm());
1023     else
1024       Info.setAVLReg(DefInfo.getAVLReg());
1025     return;
1026   }
1027 }
1028 
1029 // Given a state with which we evaluated MI (see transferBefore above for why
1030 // this might be different that the state MI requested), modify the state to
1031 // reflect the changes MI might make.
1032 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
1033   if (isVectorConfigInstr(MI)) {
1034     Info = getInfoForVSETVLI(MI);
1035     return;
1036   }
1037 
1038   if (RISCV::isFaultFirstLoad(MI)) {
1039     // Update AVL to vl-output of the fault first load.
1040     Info.setAVLReg(MI.getOperand(1).getReg());
1041     return;
1042   }
1043 
1044   // If this is something that updates VL/VTYPE that we don't know about, set
1045   // the state to unknown.
1046   if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1047       MI.modifiesRegister(RISCV::VTYPE))
1048     Info = VSETVLIInfo::getUnknown();
1049 }
1050 
1051 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
1052   bool HadVectorOp = false;
1053 
1054   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1055   BBInfo.Change = BBInfo.Pred;
1056   for (const MachineInstr &MI : MBB) {
1057     transferBefore(BBInfo.Change, MI);
1058 
1059     if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1060       HadVectorOp = true;
1061 
1062     transferAfter(BBInfo.Change, MI);
1063   }
1064 
1065   return HadVectorOp;
1066 }
1067 
1068 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1069 
1070   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1071 
1072   BBInfo.InQueue = false;
1073 
1074   // Start with the previous entry so that we keep the most conservative state
1075   // we have ever found.
1076   VSETVLIInfo InInfo = BBInfo.Pred;
1077   if (MBB.pred_empty()) {
1078     // There are no predecessors, so use the default starting status.
1079     InInfo.setUnknown();
1080   } else {
1081     for (MachineBasicBlock *P : MBB.predecessors())
1082       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1083   }
1084 
1085   // If we don't have any valid predecessor value, wait until we do.
1086   if (!InInfo.isValid())
1087     return;
1088 
1089   // If no change, no need to rerun block
1090   if (InInfo == BBInfo.Pred)
1091     return;
1092 
1093   BBInfo.Pred = InInfo;
1094   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1095                     << " changed to " << BBInfo.Pred << "\n");
1096 
1097   // Note: It's tempting to cache the state changes here, but due to the
1098   // compatibility checks performed a blocks output state can change based on
1099   // the input state.  To cache, we'd have to add logic for finding
1100   // never-compatible state changes.
1101   computeVLVTYPEChanges(MBB);
1102   VSETVLIInfo TmpStatus = BBInfo.Change;
1103 
1104   // If the new exit value matches the old exit value, we don't need to revisit
1105   // any blocks.
1106   if (BBInfo.Exit == TmpStatus)
1107     return;
1108 
1109   BBInfo.Exit = TmpStatus;
1110   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1111                     << " changed to " << BBInfo.Exit << "\n");
1112 
1113   // Add the successors to the work list so we can propagate the changed exit
1114   // status.
1115   for (MachineBasicBlock *S : MBB.successors())
1116     if (!BlockInfo[S->getNumber()].InQueue) {
1117       BlockInfo[S->getNumber()].InQueue = true;
1118       WorkList.push(S);
1119     }
1120 }
1121 
1122 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1123 // be unneeded if the AVL is a phi node where all incoming values are VL
1124 // outputs from the last VSETVLI in their respective basic blocks.
1125 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1126                                         const MachineBasicBlock &MBB) const {
1127   if (DisableInsertVSETVLPHIOpt)
1128     return true;
1129 
1130   if (!Require.hasAVLReg())
1131     return true;
1132 
1133   Register AVLReg = Require.getAVLReg();
1134   if (!AVLReg.isVirtual())
1135     return true;
1136 
1137   // We need the AVL to be produce by a PHI node in this basic block.
1138   MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1139   if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1140     return true;
1141 
1142   for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1143        PHIOp += 2) {
1144     Register InReg = PHI->getOperand(PHIOp).getReg();
1145     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1146     const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1147     // If the exit from the predecessor has the VTYPE we are looking for
1148     // we might be able to avoid a VSETVLI.
1149     if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1150       return true;
1151 
1152     // We need the PHI input to the be the output of a VSET(I)VLI.
1153     MachineInstr *DefMI = MRI->getVRegDef(InReg);
1154     if (!DefMI || !isVectorConfigInstr(*DefMI))
1155       return true;
1156 
1157     // We found a VSET(I)VLI make sure it matches the output of the
1158     // predecessor block.
1159     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1160     if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1161         !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1162       return true;
1163   }
1164 
1165   // If all the incoming values to the PHI checked out, we don't need
1166   // to insert a VSETVLI.
1167   return false;
1168 }
1169 
1170 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1171   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1172   // Track whether the prefix of the block we've scanned is transparent
1173   // (meaning has not yet changed the abstract state).
1174   bool PrefixTransparent = true;
1175   for (MachineInstr &MI : MBB) {
1176     const VSETVLIInfo PrevInfo = CurInfo;
1177     transferBefore(CurInfo, MI);
1178 
1179     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1180     if (isVectorConfigInstr(MI)) {
1181       // Conservatively, mark the VL and VTYPE as live.
1182       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1183              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1184              "Unexpected operands where VL and VTYPE should be");
1185       MI.getOperand(3).setIsDead(false);
1186       MI.getOperand(4).setIsDead(false);
1187       PrefixTransparent = false;
1188     }
1189 
1190     uint64_t TSFlags = MI.getDesc().TSFlags;
1191     if (RISCVII::hasSEWOp(TSFlags)) {
1192       if (PrevInfo != CurInfo) {
1193         // If this is the first implicit state change, and the state change
1194         // requested can be proven to produce the same register contents, we
1195         // can skip emitting the actual state change and continue as if we
1196         // had since we know the GPR result of the implicit state change
1197         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1198         // we *do* need to model the state as if it changed as while the
1199         // register contents are unchanged, the abstract model can change.
1200         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1201           insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1202         PrefixTransparent = false;
1203       }
1204 
1205       if (RISCVII::hasVLOp(TSFlags)) {
1206         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1207         if (VLOp.isReg()) {
1208           // Erase the AVL operand from the instruction.
1209           VLOp.setReg(RISCV::NoRegister);
1210           VLOp.setIsKill(false);
1211         }
1212         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1213                                                 /*isImp*/ true));
1214       }
1215       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1216                                               /*isImp*/ true));
1217     }
1218 
1219     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1220         MI.modifiesRegister(RISCV::VTYPE))
1221       PrefixTransparent = false;
1222 
1223     transferAfter(CurInfo, MI);
1224   }
1225 
1226   // If we reach the end of the block and our current info doesn't match the
1227   // expected info, insert a vsetvli to correct.
1228   if (!UseStrictAsserts) {
1229     const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1230     if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1231         CurInfo != ExitInfo) {
1232       // Note there's an implicit assumption here that terminators never use
1233       // or modify VL or VTYPE.  Also, fallthrough will return end().
1234       auto InsertPt = MBB.getFirstInstrTerminator();
1235       insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1236                     CurInfo);
1237       CurInfo = ExitInfo;
1238     }
1239   }
1240 
1241   if (UseStrictAsserts && CurInfo.isValid()) {
1242     const auto &Info = BlockInfo[MBB.getNumber()];
1243     if (CurInfo != Info.Exit) {
1244       LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1245       LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1246       LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1247       LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1248     }
1249     assert(CurInfo == Info.Exit &&
1250            "InsertVSETVLI dataflow invariant violated");
1251   }
1252 }
1253 
1254 /// Return true if the VL value configured must be equal to the requested one.
1255 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1256   if (!Info.hasAVLImm())
1257     // VLMAX is always the same value.
1258     // TODO: Could extend to other registers by looking at the associated vreg
1259     // def placement.
1260     return RISCV::X0 == Info.getAVLReg();
1261 
1262   unsigned AVL = Info.getAVLImm();
1263   unsigned SEW = Info.getSEW();
1264   unsigned AVLInBits = AVL * SEW;
1265 
1266   unsigned LMul;
1267   bool Fractional;
1268   std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1269 
1270   if (Fractional)
1271     return ST.getRealMinVLen() / LMul >= AVLInBits;
1272   return ST.getRealMinVLen() * LMul >= AVLInBits;
1273 }
1274 
1275 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1276 /// we're about to insert by looking for cases where we can PRE from the
1277 /// beginning of one block to the end of one of its predecessors.  Specifically,
1278 /// this is geared to catch the common case of a fixed length vsetvl in a single
1279 /// block loop when it could execute once in the preheader instead.
1280 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1281   const MachineFunction &MF = *MBB.getParent();
1282   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1283 
1284   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1285     return;
1286 
1287   MachineBasicBlock *UnavailablePred = nullptr;
1288   VSETVLIInfo AvailableInfo;
1289   for (MachineBasicBlock *P : MBB.predecessors()) {
1290     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1291     if (PredInfo.isUnknown()) {
1292       if (UnavailablePred)
1293         return;
1294       UnavailablePred = P;
1295     } else if (!AvailableInfo.isValid()) {
1296       AvailableInfo = PredInfo;
1297     } else if (AvailableInfo != PredInfo) {
1298       return;
1299     }
1300   }
1301 
1302   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1303   // phase 3.
1304   if (!UnavailablePred || !AvailableInfo.isValid())
1305     return;
1306 
1307   // Critical edge - TODO: consider splitting?
1308   if (UnavailablePred->succ_size() != 1)
1309     return;
1310 
1311   // If VL can be less than AVL, then we can't reduce the frequency of exec.
1312   if (!hasFixedResult(AvailableInfo, ST))
1313     return;
1314 
1315   // Model the effect of changing the input state of the block MBB to
1316   // AvailableInfo.  We're looking for two issues here; one legality,
1317   // one profitability.
1318   // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1319   //    may hit the end of the block with a different end state.  We can
1320   //    not make this change without reflowing later blocks as well.
1321   // 2) If we don't actually remove a transition, inserting a vsetvli
1322   //    into the predecessor block would be correct, but unprofitable.
1323   VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1324   VSETVLIInfo CurInfo = AvailableInfo;
1325   int TransitionsRemoved = 0;
1326   for (const MachineInstr &MI : MBB) {
1327     const VSETVLIInfo LastInfo = CurInfo;
1328     const VSETVLIInfo LastOldInfo = OldInfo;
1329     transferBefore(CurInfo, MI);
1330     transferBefore(OldInfo, MI);
1331     if (CurInfo == LastInfo)
1332       TransitionsRemoved++;
1333     if (LastOldInfo == OldInfo)
1334       TransitionsRemoved--;
1335     transferAfter(CurInfo, MI);
1336     transferAfter(OldInfo, MI);
1337     if (CurInfo == OldInfo)
1338       // Convergence.  All transitions after this must match by construction.
1339       break;
1340   }
1341   if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1342     // Issues 1 and 2 above
1343     return;
1344 
1345   // Finally, update both data flow state and insert the actual vsetvli.
1346   // Doing both keeps the code in sync with the dataflow results, which
1347   // is critical for correctness of phase 3.
1348   auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1349   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1350                     << UnavailablePred->getName() << " with state "
1351                     << AvailableInfo << "\n");
1352   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1353   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1354 
1355   // Note there's an implicit assumption here that terminators never use
1356   // or modify VL or VTYPE.  Also, fallthrough will return end().
1357   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1358   insertVSETVLI(*UnavailablePred, InsertPt,
1359                 UnavailablePred->findDebugLoc(InsertPt),
1360                 AvailableInfo, OldExit);
1361 }
1362 
1363 static void doUnion(DemandedFields &A, DemandedFields B) {
1364   A.VLAny |= B.VLAny;
1365   A.VLZeroness |= B.VLZeroness;
1366   A.SEW = std::max(A.SEW, B.SEW);
1367   A.LMUL |= B.LMUL;
1368   A.SEWLMULRatio |= B.SEWLMULRatio;
1369   A.TailPolicy |= B.TailPolicy;
1370   A.MaskPolicy |= B.MaskPolicy;
1371 }
1372 
1373 static bool isNonZeroAVL(const MachineOperand &MO) {
1374   if (MO.isReg())
1375     return RISCV::X0 == MO.getReg();
1376   assert(MO.isImm());
1377   return 0 != MO.getImm();
1378 }
1379 
1380 // Return true if we can mutate PrevMI to match MI without changing any the
1381 // fields which would be observed.
1382 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1383                                  const MachineInstr &MI,
1384                                  const DemandedFields &Used) {
1385   // If the VL values aren't equal, return false if either a) the former is
1386   // demanded, or b) we can't rewrite the former to be the later for
1387   // implementation reasons.
1388   if (!isVLPreservingConfig(MI)) {
1389     if (Used.VLAny)
1390       return false;
1391 
1392     // TODO: Requires more care in the mutation...
1393     if (isVLPreservingConfig(PrevMI))
1394       return false;
1395 
1396     // We don't bother to handle the equally zero case here as it's largely
1397     // uninteresting.
1398     if (Used.VLZeroness &&
1399         (!isNonZeroAVL(MI.getOperand(1)) ||
1400          !isNonZeroAVL(PrevMI.getOperand(1))))
1401       return false;
1402 
1403     // TODO: Track whether the register is defined between
1404     // PrevMI and MI.
1405     if (MI.getOperand(1).isReg() &&
1406         RISCV::X0 != MI.getOperand(1).getReg())
1407       return false;
1408 
1409     // TODO: We need to change the result register to allow this rewrite
1410     // without the result forming a vl preserving vsetvli which is not
1411     // a correct state merge.
1412     if (PrevMI.getOperand(0).getReg() == RISCV::X0 &&
1413         MI.getOperand(1).isReg())
1414       return false;
1415   }
1416 
1417   if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1418     return false;
1419 
1420   auto PriorVType = PrevMI.getOperand(2).getImm();
1421   auto VType = MI.getOperand(2).getImm();
1422   return areCompatibleVTYPEs(PriorVType, VType, Used);
1423 }
1424 
1425 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1426   MachineInstr *NextMI = nullptr;
1427   // We can have arbitrary code in successors, so VL and VTYPE
1428   // must be considered demanded.
1429   DemandedFields Used;
1430   Used.demandVL();
1431   Used.demandVTYPE();
1432   SmallVector<MachineInstr*> ToDelete;
1433   for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1434 
1435     if (!isVectorConfigInstr(MI)) {
1436       doUnion(Used, getDemanded(MI, MRI));
1437       continue;
1438     }
1439 
1440     Register VRegDef = MI.getOperand(0).getReg();
1441     if (VRegDef != RISCV::X0 &&
1442         !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1443       Used.demandVL();
1444 
1445     if (NextMI) {
1446       if (!Used.usedVL() && !Used.usedVTYPE()) {
1447         ToDelete.push_back(&MI);
1448         // Leave NextMI unchanged
1449         continue;
1450       } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
1451         if (!isVLPreservingConfig(*NextMI)) {
1452           if (NextMI->getOperand(1).isImm())
1453             MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1454           else
1455             MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1456           MI.setDesc(NextMI->getDesc());
1457         }
1458         MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1459         // Don't delete a vsetvli if its result might be used.
1460         Register NextVRefDef = NextMI->getOperand(0).getReg();
1461         if (NextVRefDef == RISCV::X0 ||
1462             (NextVRefDef.isVirtual() && MRI->use_nodbg_empty(NextVRefDef)))
1463           ToDelete.push_back(NextMI);
1464         // fallthrough
1465       }
1466     }
1467     NextMI = &MI;
1468     Used = getDemanded(MI, MRI);
1469   }
1470 
1471   for (auto *MI : ToDelete)
1472     MI->eraseFromParent();
1473 }
1474 
1475 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1476   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1477     MachineInstr &MI = *I++;
1478     if (RISCV::isFaultFirstLoad(MI)) {
1479       Register VLOutput = MI.getOperand(1).getReg();
1480       if (!MRI->use_nodbg_empty(VLOutput))
1481         BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1482                 VLOutput);
1483       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1484       MI.getOperand(1).setReg(RISCV::X0);
1485     }
1486   }
1487 }
1488 
1489 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1490   // Skip if the vector extension is not enabled.
1491   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1492   if (!ST.hasVInstructions())
1493     return false;
1494 
1495   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1496 
1497   TII = ST.getInstrInfo();
1498   MRI = &MF.getRegInfo();
1499 
1500   assert(BlockInfo.empty() && "Expect empty block infos");
1501   BlockInfo.resize(MF.getNumBlockIDs());
1502 
1503   bool HaveVectorOp = false;
1504 
1505   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1506   for (const MachineBasicBlock &MBB : MF) {
1507     HaveVectorOp |= computeVLVTYPEChanges(MBB);
1508     // Initial exit state is whatever change we found in the block.
1509     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1510     BBInfo.Exit = BBInfo.Change;
1511     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1512                       << " is " << BBInfo.Exit << "\n");
1513 
1514   }
1515 
1516   // If we didn't find any instructions that need VSETVLI, we're done.
1517   if (!HaveVectorOp) {
1518     BlockInfo.clear();
1519     return false;
1520   }
1521 
1522   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1523   // blocks to the list here, but will also add any that need to be revisited
1524   // during Phase 2 processing.
1525   for (const MachineBasicBlock &MBB : MF) {
1526     WorkList.push(&MBB);
1527     BlockInfo[MBB.getNumber()].InQueue = true;
1528   }
1529   while (!WorkList.empty()) {
1530     const MachineBasicBlock &MBB = *WorkList.front();
1531     WorkList.pop();
1532     computeIncomingVLVTYPE(MBB);
1533   }
1534 
1535   // Perform partial redundancy elimination of vsetvli transitions.
1536   for (MachineBasicBlock &MBB : MF)
1537     doPRE(MBB);
1538 
1539   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1540   // Phase 2 information to avoid adding vsetvlis before the first vector
1541   // instruction in the block if the VL/VTYPE is satisfied by its
1542   // predecessors.
1543   for (MachineBasicBlock &MBB : MF)
1544     emitVSETVLIs(MBB);
1545 
1546   // Now that all vsetvlis are explicit, go through and do block local
1547   // DSE and peephole based demanded fields based transforms.  Note that
1548   // this *must* be done outside the main dataflow so long as we allow
1549   // any cross block analysis within the dataflow.  We can't have both
1550   // demanded fields based mutation and non-local analysis in the
1551   // dataflow at the same time without introducing inconsistencies.
1552   for (MachineBasicBlock &MBB : MF)
1553     doLocalPostpass(MBB);
1554 
1555   // Once we're fully done rewriting all the instructions, do a final pass
1556   // through to check for VSETVLIs which write to an unused destination.
1557   // For the non X0, X0 variant, we can replace the destination register
1558   // with X0 to reduce register pressure.  This is really a generic
1559   // optimization which can be applied to any dead def (TODO: generalize).
1560   for (MachineBasicBlock &MBB : MF) {
1561     for (MachineInstr &MI : MBB) {
1562       if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1563           MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1564         Register VRegDef = MI.getOperand(0).getReg();
1565         if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1566           MI.getOperand(0).setReg(RISCV::X0);
1567       }
1568     }
1569   }
1570 
1571   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1572   // of VLEFF/VLSEGFF.
1573   for (MachineBasicBlock &MBB : MF)
1574     insertReadVL(MBB);
1575 
1576   BlockInfo.clear();
1577   return HaveVectorOp;
1578 }
1579 
1580 /// Returns an instance of the Insert VSETVLI pass.
1581 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1582   return new RISCVInsertVSETVLI();
1583 }
1584