1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include <queue>
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "riscv-insert-vsetvli"
36 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
37 
38 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
39 STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
40 
41 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
42     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
43     cl::desc("Disable looking through phis when inserting vsetvlis."));
44 
45 static cl::opt<bool> UseStrictAsserts(
46     "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
47     cl::desc("Enable strict assertion checking for the dataflow algorithm"));
48 
49 namespace {
50 
getVLOpNum(const MachineInstr & MI)51 static unsigned getVLOpNum(const MachineInstr &MI) {
52   return RISCVII::getVLOpNum(MI.getDesc());
53 }
54 
getSEWOpNum(const MachineInstr & MI)55 static unsigned getSEWOpNum(const MachineInstr &MI) {
56   return RISCVII::getSEWOpNum(MI.getDesc());
57 }
58 
isVectorConfigInstr(const MachineInstr & MI)59 static bool isVectorConfigInstr(const MachineInstr &MI) {
60   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
61          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
62          MI.getOpcode() == RISCV::PseudoVSETIVLI;
63 }
64 
65 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
66 /// VL and only sets VTYPE.
isVLPreservingConfig(const MachineInstr & MI)67 static bool isVLPreservingConfig(const MachineInstr &MI) {
68   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
69     return false;
70   assert(RISCV::X0 == MI.getOperand(1).getReg());
71   return RISCV::X0 == MI.getOperand(0).getReg();
72 }
73 
isFloatScalarMoveOrScalarSplatInstr(const MachineInstr & MI)74 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
75   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
76   default:
77     return false;
78   case RISCV::VFMV_S_F:
79   case RISCV::VFMV_V_F:
80     return true;
81   }
82 }
83 
isScalarExtractInstr(const MachineInstr & MI)84 static bool isScalarExtractInstr(const MachineInstr &MI) {
85   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
86   default:
87     return false;
88   case RISCV::VMV_X_S:
89   case RISCV::VFMV_F_S:
90     return true;
91   }
92 }
93 
isScalarInsertInstr(const MachineInstr & MI)94 static bool isScalarInsertInstr(const MachineInstr &MI) {
95   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
96   default:
97     return false;
98   case RISCV::VMV_S_X:
99   case RISCV::VFMV_S_F:
100     return true;
101   }
102 }
103 
isScalarSplatInstr(const MachineInstr & MI)104 static bool isScalarSplatInstr(const MachineInstr &MI) {
105   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
106   default:
107     return false;
108   case RISCV::VMV_V_I:
109   case RISCV::VMV_V_X:
110   case RISCV::VFMV_V_F:
111     return true;
112   }
113 }
114 
isVSlideInstr(const MachineInstr & MI)115 static bool isVSlideInstr(const MachineInstr &MI) {
116   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
117   default:
118     return false;
119   case RISCV::VSLIDEDOWN_VX:
120   case RISCV::VSLIDEDOWN_VI:
121   case RISCV::VSLIDEUP_VX:
122   case RISCV::VSLIDEUP_VI:
123     return true;
124   }
125 }
126 
127 /// Get the EEW for a load or store instruction.  Return std::nullopt if MI is
128 /// not a load or store which ignores SEW.
getEEWForLoadStore(const MachineInstr & MI)129 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
130   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
131   default:
132     return std::nullopt;
133   case RISCV::VLE8_V:
134   case RISCV::VLSE8_V:
135   case RISCV::VSE8_V:
136   case RISCV::VSSE8_V:
137     return 8;
138   case RISCV::VLE16_V:
139   case RISCV::VLSE16_V:
140   case RISCV::VSE16_V:
141   case RISCV::VSSE16_V:
142     return 16;
143   case RISCV::VLE32_V:
144   case RISCV::VLSE32_V:
145   case RISCV::VSE32_V:
146   case RISCV::VSSE32_V:
147     return 32;
148   case RISCV::VLE64_V:
149   case RISCV::VLSE64_V:
150   case RISCV::VSE64_V:
151   case RISCV::VSSE64_V:
152     return 64;
153   }
154 }
155 
isNonZeroLoadImmediate(MachineInstr & MI)156 static bool isNonZeroLoadImmediate(MachineInstr &MI) {
157   return MI.getOpcode() == RISCV::ADDI &&
158     MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
159     MI.getOperand(1).getReg() == RISCV::X0 &&
160     MI.getOperand(2).getImm() != 0;
161 }
162 
163 /// Return true if this is an operation on mask registers.  Note that
164 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
isMaskRegOp(const MachineInstr & MI)165 static bool isMaskRegOp(const MachineInstr &MI) {
166   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
167     return false;
168   const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
169   // A Log2SEW of 0 is an operation on mask registers only.
170   return Log2SEW == 0;
171 }
172 
173 /// Return true if the inactive elements in the result are entirely undefined.
174 /// Note that this is different from "agnostic" as defined by the vector
175 /// specification.  Agnostic requires each lane to either be undisturbed, or
176 /// take the value -1; no other value is allowed.
hasUndefinedMergeOp(const MachineInstr & MI,const MachineRegisterInfo & MRI)177 static bool hasUndefinedMergeOp(const MachineInstr &MI,
178                                 const MachineRegisterInfo &MRI) {
179 
180   unsigned UseOpIdx;
181   if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
182     // If there is no passthrough operand, then the pass through
183     // lanes are undefined.
184     return true;
185 
186   // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
187   // operands are solely IMPLICIT_DEFS, then the pass through lanes are
188   // undefined.
189   const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
190   if (UseMO.getReg() == RISCV::NoRegister)
191     return true;
192 
193   if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
194     if (UseMI->isImplicitDef())
195       return true;
196 
197     if (UseMI->isRegSequence()) {
198       for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
199         MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
200         if (!SourceMI || !SourceMI->isImplicitDef())
201           return false;
202       }
203       return true;
204     }
205   }
206   return false;
207 }
208 
209 /// Which subfields of VL or VTYPE have values we need to preserve?
210 struct DemandedFields {
211   // Some unknown property of VL is used.  If demanded, must preserve entire
212   // value.
213   bool VLAny = false;
214   // Only zero vs non-zero is used. If demanded, can change non-zero values.
215   bool VLZeroness = false;
216   // What properties of SEW we need to preserve.
217   enum : uint8_t {
218     SEWEqual = 3,              // The exact value of SEW needs to be preserved.
219     SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
220                                // than or equal to the original value.
221     SEWGreaterThanOrEqualAndLessThan64 =
222         1,      // SEW can be changed as long as it's greater
223                 // than or equal to the original value, but must be less
224                 // than 64.
225     SEWNone = 0 // We don't need to preserve SEW at all.
226   } SEW = SEWNone;
227   bool LMUL = false;
228   bool SEWLMULRatio = false;
229   bool TailPolicy = false;
230   bool MaskPolicy = false;
231 
232   // Return true if any part of VTYPE was used
usedVTYPE__anon5ac2218d0111::DemandedFields233   bool usedVTYPE() const {
234     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
235   }
236 
237   // Return true if any property of VL was used
usedVL__anon5ac2218d0111::DemandedFields238   bool usedVL() {
239     return VLAny || VLZeroness;
240   }
241 
242   // Mark all VTYPE subfields and properties as demanded
demandVTYPE__anon5ac2218d0111::DemandedFields243   void demandVTYPE() {
244     SEW = SEWEqual;
245     LMUL = true;
246     SEWLMULRatio = true;
247     TailPolicy = true;
248     MaskPolicy = true;
249   }
250 
251   // Mark all VL properties as demanded
demandVL__anon5ac2218d0111::DemandedFields252   void demandVL() {
253     VLAny = true;
254     VLZeroness = true;
255   }
256 
257 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
258   /// Support for debugging, callable in GDB: V->dump()
dump__anon5ac2218d0111::DemandedFields259   LLVM_DUMP_METHOD void dump() const {
260     print(dbgs());
261     dbgs() << "\n";
262   }
263 
264   /// Implement operator<<.
print__anon5ac2218d0111::DemandedFields265   void print(raw_ostream &OS) const {
266     OS << "{";
267     OS << "VLAny=" << VLAny << ", ";
268     OS << "VLZeroness=" << VLZeroness << ", ";
269     OS << "SEW=";
270     switch (SEW) {
271     case SEWEqual:
272       OS << "SEWEqual";
273       break;
274     case SEWGreaterThanOrEqual:
275       OS << "SEWGreaterThanOrEqual";
276       break;
277     case SEWGreaterThanOrEqualAndLessThan64:
278       OS << "SEWGreaterThanOrEqualAndLessThan64";
279       break;
280     case SEWNone:
281       OS << "SEWNone";
282       break;
283     };
284     OS << ", ";
285     OS << "LMUL=" << LMUL << ", ";
286     OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
287     OS << "TailPolicy=" << TailPolicy << ", ";
288     OS << "MaskPolicy=" << MaskPolicy;
289     OS << "}";
290   }
291 #endif
292 };
293 
294 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
295 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const DemandedFields & DF)296 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
297   DF.print(OS);
298   return OS;
299 }
300 #endif
301 
302 /// Return true if moving from CurVType to NewVType is
303 /// indistinguishable from the perspective of an instruction (or set
304 /// of instructions) which use only the Used subfields and properties.
areCompatibleVTYPEs(uint64_t CurVType,uint64_t NewVType,const DemandedFields & Used)305 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
306                                 const DemandedFields &Used) {
307   switch (Used.SEW) {
308   case DemandedFields::SEWNone:
309     break;
310   case DemandedFields::SEWEqual:
311     if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
312       return false;
313     break;
314   case DemandedFields::SEWGreaterThanOrEqual:
315     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
316       return false;
317     break;
318   case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
319     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
320         RISCVVType::getSEW(NewVType) >= 64)
321       return false;
322     break;
323   }
324 
325   if (Used.LMUL &&
326       RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
327     return false;
328 
329   if (Used.SEWLMULRatio) {
330     auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
331                                               RISCVVType::getVLMUL(CurVType));
332     auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
333                                               RISCVVType::getVLMUL(NewVType));
334     if (Ratio1 != Ratio2)
335       return false;
336   }
337 
338   if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
339                              RISCVVType::isTailAgnostic(NewVType))
340     return false;
341   if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
342                              RISCVVType::isMaskAgnostic(NewVType))
343     return false;
344   return true;
345 }
346 
347 /// Return the fields and properties demanded by the provided instruction.
getDemanded(const MachineInstr & MI,const MachineRegisterInfo * MRI,const RISCVSubtarget * ST)348 DemandedFields getDemanded(const MachineInstr &MI,
349                            const MachineRegisterInfo *MRI,
350                            const RISCVSubtarget *ST) {
351   // Warning: This function has to work on both the lowered (i.e. post
352   // emitVSETVLIs) and pre-lowering forms.  The main implication of this is
353   // that it can't use the value of a SEW, VL, or Policy operand as they might
354   // be stale after lowering.
355 
356   // Most instructions don't use any of these subfeilds.
357   DemandedFields Res;
358   // Start conservative if registers are used
359   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
360     Res.demandVL();
361   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
362     Res.demandVTYPE();
363   // Start conservative on the unlowered form too
364   uint64_t TSFlags = MI.getDesc().TSFlags;
365   if (RISCVII::hasSEWOp(TSFlags)) {
366     Res.demandVTYPE();
367     if (RISCVII::hasVLOp(TSFlags))
368       Res.demandVL();
369 
370     // Behavior is independent of mask policy.
371     if (!RISCVII::usesMaskPolicy(TSFlags))
372       Res.MaskPolicy = false;
373   }
374 
375   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
376   // They instead demand the ratio of the two which is used in computing
377   // EMUL, but which allows us the flexibility to change SEW and LMUL
378   // provided we don't change the ratio.
379   // Note: We assume that the instructions initial SEW is the EEW encoded
380   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
381   if (getEEWForLoadStore(MI)) {
382     Res.SEW = DemandedFields::SEWNone;
383     Res.LMUL = false;
384   }
385 
386   // Store instructions don't use the policy fields.
387   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
388     Res.TailPolicy = false;
389     Res.MaskPolicy = false;
390   }
391 
392   // If this is a mask reg operation, it only cares about VLMAX.
393   // TODO: Possible extensions to this logic
394   // * Probably ok if available VLMax is larger than demanded
395   // * The policy bits can probably be ignored..
396   if (isMaskRegOp(MI)) {
397     Res.SEW = DemandedFields::SEWNone;
398     Res.LMUL = false;
399   }
400 
401   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
402   if (isScalarInsertInstr(MI)) {
403     Res.LMUL = false;
404     Res.SEWLMULRatio = false;
405     Res.VLAny = false;
406     // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
407     // need to preserve any other bits and are thus compatible with any larger,
408     // etype and can disregard policy bits.  Warning: It's tempting to try doing
409     // this for any tail agnostic operation, but we can't as TA requires
410     // tail lanes to either be the original value or -1.  We are writing
411     // unknown bits to the lanes here.
412     if (hasUndefinedMergeOp(MI, *MRI)) {
413       if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
414         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
415       else
416         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
417       Res.TailPolicy = false;
418     }
419   }
420 
421   // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
422   if (isScalarExtractInstr(MI)) {
423     assert(!RISCVII::hasVLOp(TSFlags));
424     Res.LMUL = false;
425     Res.SEWLMULRatio = false;
426     Res.TailPolicy = false;
427     Res.MaskPolicy = false;
428   }
429 
430   return Res;
431 }
432 
433 /// Defines the abstract state with which the forward dataflow models the
434 /// values of the VL and VTYPE registers after insertion.
435 class VSETVLIInfo {
436   union {
437     Register AVLReg;
438     unsigned AVLImm;
439   };
440 
441   enum : uint8_t {
442     Uninitialized,
443     AVLIsReg,
444     AVLIsImm,
445     Unknown,
446   } State = Uninitialized;
447 
448   // Fields from VTYPE.
449   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
450   uint8_t SEW = 0;
451   uint8_t TailAgnostic : 1;
452   uint8_t MaskAgnostic : 1;
453   uint8_t SEWLMULRatioOnly : 1;
454 
455 public:
VSETVLIInfo()456   VSETVLIInfo()
457       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
458         SEWLMULRatioOnly(false) {}
459 
getUnknown()460   static VSETVLIInfo getUnknown() {
461     VSETVLIInfo Info;
462     Info.setUnknown();
463     return Info;
464   }
465 
isValid() const466   bool isValid() const { return State != Uninitialized; }
setUnknown()467   void setUnknown() { State = Unknown; }
isUnknown() const468   bool isUnknown() const { return State == Unknown; }
469 
setAVLReg(Register Reg)470   void setAVLReg(Register Reg) {
471     AVLReg = Reg;
472     State = AVLIsReg;
473   }
474 
setAVLImm(unsigned Imm)475   void setAVLImm(unsigned Imm) {
476     AVLImm = Imm;
477     State = AVLIsImm;
478   }
479 
hasAVLImm() const480   bool hasAVLImm() const { return State == AVLIsImm; }
hasAVLReg() const481   bool hasAVLReg() const { return State == AVLIsReg; }
getAVLReg() const482   Register getAVLReg() const {
483     assert(hasAVLReg());
484     return AVLReg;
485   }
getAVLImm() const486   unsigned getAVLImm() const {
487     assert(hasAVLImm());
488     return AVLImm;
489   }
490 
setAVL(VSETVLIInfo Info)491   void setAVL(VSETVLIInfo Info) {
492     assert(Info.isValid());
493     if (Info.isUnknown())
494       setUnknown();
495     else if (Info.hasAVLReg())
496       setAVLReg(Info.getAVLReg());
497     else {
498       assert(Info.hasAVLImm());
499       setAVLImm(Info.getAVLImm());
500     }
501   }
502 
getSEW() const503   unsigned getSEW() const { return SEW; }
getVLMUL() const504   RISCVII::VLMUL getVLMUL() const { return VLMul; }
getTailAgnostic() const505   bool getTailAgnostic() const { return TailAgnostic; }
getMaskAgnostic() const506   bool getMaskAgnostic() const { return MaskAgnostic; }
507 
hasNonZeroAVL(const MachineRegisterInfo & MRI) const508   bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
509     if (hasAVLImm())
510       return getAVLImm() > 0;
511     if (hasAVLReg()) {
512       if (getAVLReg() == RISCV::X0)
513         return true;
514       if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
515           MI && isNonZeroLoadImmediate(*MI))
516         return true;
517       return false;
518     }
519     return false;
520   }
521 
hasEquallyZeroAVL(const VSETVLIInfo & Other,const MachineRegisterInfo & MRI) const522   bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
523                          const MachineRegisterInfo &MRI) const {
524     if (hasSameAVL(Other))
525       return true;
526     return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
527   }
528 
hasSameAVL(const VSETVLIInfo & Other) const529   bool hasSameAVL(const VSETVLIInfo &Other) const {
530     if (hasAVLReg() && Other.hasAVLReg())
531       return getAVLReg() == Other.getAVLReg();
532 
533     if (hasAVLImm() && Other.hasAVLImm())
534       return getAVLImm() == Other.getAVLImm();
535 
536     return false;
537   }
538 
setVTYPE(unsigned VType)539   void setVTYPE(unsigned VType) {
540     assert(isValid() && !isUnknown() &&
541            "Can't set VTYPE for uninitialized or unknown");
542     VLMul = RISCVVType::getVLMUL(VType);
543     SEW = RISCVVType::getSEW(VType);
544     TailAgnostic = RISCVVType::isTailAgnostic(VType);
545     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
546   }
setVTYPE(RISCVII::VLMUL L,unsigned S,bool TA,bool MA)547   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
548     assert(isValid() && !isUnknown() &&
549            "Can't set VTYPE for uninitialized or unknown");
550     VLMul = L;
551     SEW = S;
552     TailAgnostic = TA;
553     MaskAgnostic = MA;
554   }
555 
setVLMul(RISCVII::VLMUL VLMul)556   void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
557 
encodeVTYPE() const558   unsigned encodeVTYPE() const {
559     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
560            "Can't encode VTYPE for uninitialized or unknown");
561     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
562   }
563 
hasSEWLMULRatioOnly() const564   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
565 
hasSameVTYPE(const VSETVLIInfo & Other) const566   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
567     assert(isValid() && Other.isValid() &&
568            "Can't compare invalid VSETVLIInfos");
569     assert(!isUnknown() && !Other.isUnknown() &&
570            "Can't compare VTYPE in unknown state");
571     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
572            "Can't compare when only LMUL/SEW ratio is valid.");
573     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
574            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
575                     Other.MaskAgnostic);
576   }
577 
getSEWLMULRatio() const578   unsigned getSEWLMULRatio() const {
579     assert(isValid() && !isUnknown() &&
580            "Can't use VTYPE for uninitialized or unknown");
581     return RISCVVType::getSEWLMULRatio(SEW, VLMul);
582   }
583 
584   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
585   // Note that having the same VLMAX ensures that both share the same
586   // function from AVL to VL; that is, they must produce the same VL value
587   // for any given AVL value.
hasSameVLMAX(const VSETVLIInfo & Other) const588   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
589     assert(isValid() && Other.isValid() &&
590            "Can't compare invalid VSETVLIInfos");
591     assert(!isUnknown() && !Other.isUnknown() &&
592            "Can't compare VTYPE in unknown state");
593     return getSEWLMULRatio() == Other.getSEWLMULRatio();
594   }
595 
hasCompatibleVTYPE(const DemandedFields & Used,const VSETVLIInfo & Require) const596   bool hasCompatibleVTYPE(const DemandedFields &Used,
597                           const VSETVLIInfo &Require) const {
598     return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
599   }
600 
601   // Determine whether the vector instructions requirements represented by
602   // Require are compatible with the previous vsetvli instruction represented
603   // by this.  MI is the instruction whose requirements we're considering.
isCompatible(const DemandedFields & Used,const VSETVLIInfo & Require,const MachineRegisterInfo & MRI) const604   bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
605                     const MachineRegisterInfo &MRI) const {
606     assert(isValid() && Require.isValid() &&
607            "Can't compare invalid VSETVLIInfos");
608     assert(!Require.SEWLMULRatioOnly &&
609            "Expected a valid VTYPE for instruction!");
610     // Nothing is compatible with Unknown.
611     if (isUnknown() || Require.isUnknown())
612       return false;
613 
614     // If only our VLMAX ratio is valid, then this isn't compatible.
615     if (SEWLMULRatioOnly)
616       return false;
617 
618     if (Used.VLAny && !hasSameAVL(Require))
619       return false;
620 
621     if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
622       return false;
623 
624     return hasCompatibleVTYPE(Used, Require);
625   }
626 
operator ==(const VSETVLIInfo & Other) const627   bool operator==(const VSETVLIInfo &Other) const {
628     // Uninitialized is only equal to another Uninitialized.
629     if (!isValid())
630       return !Other.isValid();
631     if (!Other.isValid())
632       return !isValid();
633 
634     // Unknown is only equal to another Unknown.
635     if (isUnknown())
636       return Other.isUnknown();
637     if (Other.isUnknown())
638       return isUnknown();
639 
640     if (!hasSameAVL(Other))
641       return false;
642 
643     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
644     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
645       return false;
646 
647     // If only the VLMAX is valid, check that it is the same.
648     if (SEWLMULRatioOnly)
649       return hasSameVLMAX(Other);
650 
651     // If the full VTYPE is valid, check that it is the same.
652     return hasSameVTYPE(Other);
653   }
654 
operator !=(const VSETVLIInfo & Other) const655   bool operator!=(const VSETVLIInfo &Other) const {
656     return !(*this == Other);
657   }
658 
659   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
660   // both predecessors.
intersect(const VSETVLIInfo & Other) const661   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
662     // If the new value isn't valid, ignore it.
663     if (!Other.isValid())
664       return *this;
665 
666     // If this value isn't valid, this must be the first predecessor, use it.
667     if (!isValid())
668       return Other;
669 
670     // If either is unknown, the result is unknown.
671     if (isUnknown() || Other.isUnknown())
672       return VSETVLIInfo::getUnknown();
673 
674     // If we have an exact, match return this.
675     if (*this == Other)
676       return *this;
677 
678     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
679     // return an SEW/LMUL ratio only value.
680     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
681       VSETVLIInfo MergeInfo = *this;
682       MergeInfo.SEWLMULRatioOnly = true;
683       return MergeInfo;
684     }
685 
686     // Otherwise the result is unknown.
687     return VSETVLIInfo::getUnknown();
688   }
689 
690 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
691   /// Support for debugging, callable in GDB: V->dump()
dump() const692   LLVM_DUMP_METHOD void dump() const {
693     print(dbgs());
694     dbgs() << "\n";
695   }
696 
697   /// Implement operator<<.
698   /// @{
print(raw_ostream & OS) const699   void print(raw_ostream &OS) const {
700     OS << "{";
701     if (!isValid())
702       OS << "Uninitialized";
703     if (isUnknown())
704       OS << "unknown";
705     if (hasAVLReg())
706       OS << "AVLReg=" << (unsigned)AVLReg;
707     if (hasAVLImm())
708       OS << "AVLImm=" << (unsigned)AVLImm;
709     OS << ", "
710        << "VLMul=" << (unsigned)VLMul << ", "
711        << "SEW=" << (unsigned)SEW << ", "
712        << "TailAgnostic=" << (bool)TailAgnostic << ", "
713        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
714        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
715   }
716 #endif
717 };
718 
719 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
720 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const VSETVLIInfo & V)721 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
722   V.print(OS);
723   return OS;
724 }
725 #endif
726 
727 struct BlockData {
728   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
729   // block. Calculated in Phase 2.
730   VSETVLIInfo Exit;
731 
732   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
733   // blocks. Calculated in Phase 2, and used by Phase 3.
734   VSETVLIInfo Pred;
735 
736   // Keeps track of whether the block is already in the queue.
737   bool InQueue = false;
738 
739   BlockData() = default;
740 };
741 
742 class RISCVInsertVSETVLI : public MachineFunctionPass {
743   const RISCVSubtarget *ST;
744   const TargetInstrInfo *TII;
745   MachineRegisterInfo *MRI;
746 
747   std::vector<BlockData> BlockInfo;
748   std::queue<const MachineBasicBlock *> WorkList;
749 
750 public:
751   static char ID;
752 
RISCVInsertVSETVLI()753   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
754   bool runOnMachineFunction(MachineFunction &MF) override;
755 
getAnalysisUsage(AnalysisUsage & AU) const756   void getAnalysisUsage(AnalysisUsage &AU) const override {
757     AU.setPreservesCFG();
758     MachineFunctionPass::getAnalysisUsage(AU);
759   }
760 
getPassName() const761   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
762 
763 private:
764   bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
765                    const VSETVLIInfo &CurInfo) const;
766   bool needVSETVLIPHI(const VSETVLIInfo &Require,
767                       const MachineBasicBlock &MBB) const;
768   void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
769                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
770   void insertVSETVLI(MachineBasicBlock &MBB,
771                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
772                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
773 
774   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
775   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
776   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
777                              VSETVLIInfo &Info) const;
778   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
779   void emitVSETVLIs(MachineBasicBlock &MBB);
780   void doLocalPostpass(MachineBasicBlock &MBB);
781   void doPRE(MachineBasicBlock &MBB);
782   void insertReadVL(MachineBasicBlock &MBB);
783 };
784 
785 } // end anonymous namespace
786 
787 char RISCVInsertVSETVLI::ID = 0;
788 
INITIALIZE_PASS(RISCVInsertVSETVLI,DEBUG_TYPE,RISCV_INSERT_VSETVLI_NAME,false,false)789 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
790                 false, false)
791 
792 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
793 // VSETIVLI instruction.
794 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
795   VSETVLIInfo NewInfo;
796   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
797     NewInfo.setAVLImm(MI.getOperand(1).getImm());
798   } else {
799     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
800            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
801     Register AVLReg = MI.getOperand(1).getReg();
802     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
803            "Can't handle X0, X0 vsetvli yet");
804     NewInfo.setAVLReg(AVLReg);
805   }
806   NewInfo.setVTYPE(MI.getOperand(2).getImm());
807 
808   return NewInfo;
809 }
810 
computeVLMAX(unsigned VLEN,unsigned SEW,RISCVII::VLMUL VLMul)811 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
812                              RISCVII::VLMUL VLMul) {
813   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
814   if (Fractional)
815     VLEN = VLEN / LMul;
816   else
817     VLEN = VLEN * LMul;
818   return VLEN/SEW;
819 }
820 
computeInfoForInstr(const MachineInstr & MI,uint64_t TSFlags,const RISCVSubtarget & ST,const MachineRegisterInfo * MRI)821 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
822                                        const RISCVSubtarget &ST,
823                                        const MachineRegisterInfo *MRI) {
824   VSETVLIInfo InstrInfo;
825 
826   bool TailAgnostic = true;
827   bool MaskAgnostic = true;
828   if (!hasUndefinedMergeOp(MI, *MRI)) {
829     // Start with undisturbed.
830     TailAgnostic = false;
831     MaskAgnostic = false;
832 
833     // If there is a policy operand, use it.
834     if (RISCVII::hasVecPolicyOp(TSFlags)) {
835       const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
836       uint64_t Policy = Op.getImm();
837       assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
838              "Invalid Policy Value");
839       TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
840       MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
841     }
842 
843     // Some pseudo instructions force a tail agnostic policy despite having a
844     // tied def.
845     if (RISCVII::doesForceTailAgnostic(TSFlags))
846       TailAgnostic = true;
847 
848     if (!RISCVII::usesMaskPolicy(TSFlags))
849       MaskAgnostic = true;
850   }
851 
852   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
853 
854   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
855   // A Log2SEW of 0 is an operation on mask registers only.
856   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
857   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
858 
859   if (RISCVII::hasVLOp(TSFlags)) {
860     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
861     if (VLOp.isImm()) {
862       int64_t Imm = VLOp.getImm();
863       // Conver the VLMax sentintel to X0 register.
864       if (Imm == RISCV::VLMaxSentinel) {
865         // If we know the exact VLEN, see if we can use the constant encoding
866         // for the VLMAX instead.  This reduces register pressure slightly.
867         const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
868         if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
869           InstrInfo.setAVLImm(VLMAX);
870         else
871           InstrInfo.setAVLReg(RISCV::X0);
872       }
873       else
874         InstrInfo.setAVLImm(Imm);
875     } else {
876       InstrInfo.setAVLReg(VLOp.getReg());
877     }
878   } else {
879     assert(isScalarExtractInstr(MI));
880     InstrInfo.setAVLReg(RISCV::NoRegister);
881   }
882 #ifndef NDEBUG
883   if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
884     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
885   }
886 #endif
887   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
888 
889   // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
890   // AVL operand with the AVL of the defining vsetvli.  We avoid general
891   // register AVLs to avoid extending live ranges without being sure we can
892   // kill the original source reg entirely.
893   if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) {
894     MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg());
895     if (DefMI && isVectorConfigInstr(*DefMI)) {
896       VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
897       if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
898           (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) {
899         InstrInfo.setAVL(DefInstrInfo);
900       }
901     }
902   }
903 
904   return InstrInfo;
905 }
906 
insertVSETVLI(MachineBasicBlock & MBB,MachineInstr & MI,const VSETVLIInfo & Info,const VSETVLIInfo & PrevInfo)907 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
908                                        const VSETVLIInfo &Info,
909                                        const VSETVLIInfo &PrevInfo) {
910   DebugLoc DL = MI.getDebugLoc();
911   insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
912 }
913 
insertVSETVLI(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertPt,DebugLoc DL,const VSETVLIInfo & Info,const VSETVLIInfo & PrevInfo)914 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
915                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
916                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
917 
918   ++NumInsertedVSETVL;
919   if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
920     // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
921     // VLMAX.
922     if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
923       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
924           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
925           .addReg(RISCV::X0, RegState::Kill)
926           .addImm(Info.encodeVTYPE())
927           .addReg(RISCV::VL, RegState::Implicit);
928       return;
929     }
930 
931     // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
932     // it has the same VLMAX we want and the last VL/VTYPE we observed is the
933     // same, we can use the X0, X0 form.
934     if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() &&
935         Info.getAVLReg().isVirtual()) {
936       if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) {
937         if (isVectorConfigInstr(*DefMI)) {
938           VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
939           if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
940             BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
941                 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
942                 .addReg(RISCV::X0, RegState::Kill)
943                 .addImm(Info.encodeVTYPE())
944                 .addReg(RISCV::VL, RegState::Implicit);
945             return;
946           }
947         }
948       }
949     }
950   }
951 
952   if (Info.hasAVLImm()) {
953     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
954         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
955         .addImm(Info.getAVLImm())
956         .addImm(Info.encodeVTYPE());
957     return;
958   }
959 
960   Register AVLReg = Info.getAVLReg();
961   if (AVLReg == RISCV::NoRegister) {
962     // We can only use x0, x0 if there's no chance of the vtype change causing
963     // the previous vl to become invalid.
964     if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
965         Info.hasSameVLMAX(PrevInfo)) {
966       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
967           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
968           .addReg(RISCV::X0, RegState::Kill)
969           .addImm(Info.encodeVTYPE())
970           .addReg(RISCV::VL, RegState::Implicit);
971       return;
972     }
973     // Otherwise use an AVL of 1 to avoid depending on previous vl.
974     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
975         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
976         .addImm(1)
977         .addImm(Info.encodeVTYPE());
978     return;
979   }
980 
981   if (AVLReg.isVirtual())
982     MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
983 
984   // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
985   // opcode if the AVLReg is X0 as they have different register classes for
986   // the AVL operand.
987   Register DestReg = RISCV::X0;
988   unsigned Opcode = RISCV::PseudoVSETVLI;
989   if (AVLReg == RISCV::X0) {
990     DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
991     Opcode = RISCV::PseudoVSETVLIX0;
992   }
993   BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
994       .addReg(DestReg, RegState::Define | RegState::Dead)
995       .addReg(AVLReg)
996       .addImm(Info.encodeVTYPE());
997 }
998 
isLMUL1OrSmaller(RISCVII::VLMUL LMUL)999 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
1000   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
1001   return Fractional || LMul == 1;
1002 }
1003 
1004 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1005 /// before MI.
needVSETVLI(const MachineInstr & MI,const VSETVLIInfo & Require,const VSETVLIInfo & CurInfo) const1006 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
1007                                      const VSETVLIInfo &Require,
1008                                      const VSETVLIInfo &CurInfo) const {
1009   assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
1010 
1011   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1012     return true;
1013 
1014   DemandedFields Used = getDemanded(MI, MRI, ST);
1015 
1016   // A slidedown/slideup with an *undefined* merge op can freely clobber
1017   // elements not copied from the source vector (e.g. masked off, tail, or
1018   // slideup's prefix). Notes:
1019   // * We can't modify SEW here since the slide amount is in units of SEW.
1020   // * VL=1 is special only because we have existing support for zero vs
1021   //   non-zero VL.  We could generalize this if we had a VL > C predicate.
1022   // * The LMUL1 restriction is for machines whose latency may depend on VL.
1023   // * As above, this is only legal for tail "undefined" not "agnostic".
1024   if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1025       isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1026     Used.VLAny = false;
1027     Used.VLZeroness = true;
1028     Used.LMUL = false;
1029     Used.TailPolicy = false;
1030   }
1031 
1032   // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1033   // semantically as vmv.s.x.  This is particularly useful since we don't have an
1034   // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1035   // Since a splat is non-constant time in LMUL, we do need to be careful to not
1036   // increase the number of active vector registers (unlike for vmv.s.x.)
1037   if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1038       isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1039     Used.LMUL = false;
1040     Used.SEWLMULRatio = false;
1041     Used.VLAny = false;
1042     if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1043       Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1044     else
1045       Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1046     Used.TailPolicy = false;
1047   }
1048 
1049   if (CurInfo.isCompatible(Used, Require, *MRI))
1050     return false;
1051 
1052   // We didn't find a compatible value. If our AVL is a virtual register,
1053   // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1054   // and the last VL/VTYPE we observed is the same, we don't need a
1055   // VSETVLI here.
1056   if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
1057       CurInfo.hasCompatibleVTYPE(Used, Require)) {
1058     if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
1059       if (isVectorConfigInstr(*DefMI)) {
1060         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1061         if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1062           return false;
1063       }
1064     }
1065   }
1066 
1067   return true;
1068 }
1069 
1070 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1071 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1072 // places.
adjustIncoming(VSETVLIInfo PrevInfo,VSETVLIInfo NewInfo,DemandedFields & Demanded)1073 static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1074                                   DemandedFields &Demanded) {
1075   VSETVLIInfo Info = NewInfo;
1076 
1077   if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1078       !PrevInfo.isUnknown()) {
1079     if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1080             PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1081       Info.setVLMul(*NewVLMul);
1082     Demanded.LMUL = true;
1083   }
1084 
1085   return Info;
1086 }
1087 
1088 // Given an incoming state reaching MI, minimally modifies that state so that it
1089 // is compatible with MI. The resulting state is guaranteed to be semantically
1090 // legal for MI, but may not be the state requested by MI.
transferBefore(VSETVLIInfo & Info,const MachineInstr & MI) const1091 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1092                                         const MachineInstr &MI) const {
1093   uint64_t TSFlags = MI.getDesc().TSFlags;
1094   if (!RISCVII::hasSEWOp(TSFlags))
1095     return;
1096 
1097   const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
1098   assert(NewInfo.isValid() && !NewInfo.isUnknown());
1099   if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1100     return;
1101 
1102   const VSETVLIInfo PrevInfo = Info;
1103   if (!Info.isValid() || Info.isUnknown())
1104     Info = NewInfo;
1105 
1106   DemandedFields Demanded = getDemanded(MI, MRI, ST);
1107   const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1108 
1109   // If MI only demands that VL has the same zeroness, we only need to set the
1110   // AVL if the zeroness differs.  This removes a vsetvli entirely if the types
1111   // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1112   // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1113   // variant, so we avoid the transform to prevent extending live range of an
1114   // avl register operand.
1115   // TODO: We can probably relax this for immediates.
1116   bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, *MRI) &&
1117                      IncomingInfo.hasSameVLMAX(PrevInfo);
1118   if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1119     Info.setAVL(IncomingInfo);
1120 
1121   Info.setVTYPE(
1122       ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1123           .getVLMUL(),
1124       ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1125       // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1126       // if needed.
1127       (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1128           IncomingInfo.getTailAgnostic(),
1129       (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1130           IncomingInfo.getMaskAgnostic());
1131 
1132   // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1133   // the AVL.
1134   if (Info.hasSEWLMULRatioOnly()) {
1135     VSETVLIInfo RatiolessInfo = IncomingInfo;
1136     RatiolessInfo.setAVL(Info);
1137     Info = RatiolessInfo;
1138   }
1139 }
1140 
1141 // Given a state with which we evaluated MI (see transferBefore above for why
1142 // this might be different that the state MI requested), modify the state to
1143 // reflect the changes MI might make.
transferAfter(VSETVLIInfo & Info,const MachineInstr & MI) const1144 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1145                                        const MachineInstr &MI) const {
1146   if (isVectorConfigInstr(MI)) {
1147     Info = getInfoForVSETVLI(MI);
1148     return;
1149   }
1150 
1151   if (RISCV::isFaultFirstLoad(MI)) {
1152     // Update AVL to vl-output of the fault first load.
1153     Info.setAVLReg(MI.getOperand(1).getReg());
1154     return;
1155   }
1156 
1157   // If this is something that updates VL/VTYPE that we don't know about, set
1158   // the state to unknown.
1159   if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1160       MI.modifiesRegister(RISCV::VTYPE))
1161     Info = VSETVLIInfo::getUnknown();
1162 }
1163 
computeVLVTYPEChanges(const MachineBasicBlock & MBB,VSETVLIInfo & Info) const1164 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1165                                                VSETVLIInfo &Info) const {
1166   bool HadVectorOp = false;
1167 
1168   Info = BlockInfo[MBB.getNumber()].Pred;
1169   for (const MachineInstr &MI : MBB) {
1170     transferBefore(Info, MI);
1171 
1172     if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1173       HadVectorOp = true;
1174 
1175     transferAfter(Info, MI);
1176   }
1177 
1178   return HadVectorOp;
1179 }
1180 
computeIncomingVLVTYPE(const MachineBasicBlock & MBB)1181 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1182 
1183   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1184 
1185   BBInfo.InQueue = false;
1186 
1187   // Start with the previous entry so that we keep the most conservative state
1188   // we have ever found.
1189   VSETVLIInfo InInfo = BBInfo.Pred;
1190   if (MBB.pred_empty()) {
1191     // There are no predecessors, so use the default starting status.
1192     InInfo.setUnknown();
1193   } else {
1194     for (MachineBasicBlock *P : MBB.predecessors())
1195       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1196   }
1197 
1198   // If we don't have any valid predecessor value, wait until we do.
1199   if (!InInfo.isValid())
1200     return;
1201 
1202   // If no change, no need to rerun block
1203   if (InInfo == BBInfo.Pred)
1204     return;
1205 
1206   BBInfo.Pred = InInfo;
1207   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1208                     << " changed to " << BBInfo.Pred << "\n");
1209 
1210   // Note: It's tempting to cache the state changes here, but due to the
1211   // compatibility checks performed a blocks output state can change based on
1212   // the input state.  To cache, we'd have to add logic for finding
1213   // never-compatible state changes.
1214   VSETVLIInfo TmpStatus;
1215   computeVLVTYPEChanges(MBB, TmpStatus);
1216 
1217   // If the new exit value matches the old exit value, we don't need to revisit
1218   // any blocks.
1219   if (BBInfo.Exit == TmpStatus)
1220     return;
1221 
1222   BBInfo.Exit = TmpStatus;
1223   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1224                     << " changed to " << BBInfo.Exit << "\n");
1225 
1226   // Add the successors to the work list so we can propagate the changed exit
1227   // status.
1228   for (MachineBasicBlock *S : MBB.successors())
1229     if (!BlockInfo[S->getNumber()].InQueue) {
1230       BlockInfo[S->getNumber()].InQueue = true;
1231       WorkList.push(S);
1232     }
1233 }
1234 
1235 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1236 // be unneeded if the AVL is a phi node where all incoming values are VL
1237 // outputs from the last VSETVLI in their respective basic blocks.
needVSETVLIPHI(const VSETVLIInfo & Require,const MachineBasicBlock & MBB) const1238 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1239                                         const MachineBasicBlock &MBB) const {
1240   if (DisableInsertVSETVLPHIOpt)
1241     return true;
1242 
1243   if (!Require.hasAVLReg())
1244     return true;
1245 
1246   Register AVLReg = Require.getAVLReg();
1247   if (!AVLReg.isVirtual())
1248     return true;
1249 
1250   // We need the AVL to be produce by a PHI node in this basic block.
1251   MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1252   if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1253     return true;
1254 
1255   for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1256        PHIOp += 2) {
1257     Register InReg = PHI->getOperand(PHIOp).getReg();
1258     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1259     const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1260     // If the exit from the predecessor has the VTYPE we are looking for
1261     // we might be able to avoid a VSETVLI.
1262     if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1263       return true;
1264 
1265     // We need the PHI input to the be the output of a VSET(I)VLI.
1266     MachineInstr *DefMI = MRI->getVRegDef(InReg);
1267     if (!DefMI || !isVectorConfigInstr(*DefMI))
1268       return true;
1269 
1270     // We found a VSET(I)VLI make sure it matches the output of the
1271     // predecessor block.
1272     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1273     if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1274         !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1275       return true;
1276   }
1277 
1278   // If all the incoming values to the PHI checked out, we don't need
1279   // to insert a VSETVLI.
1280   return false;
1281 }
1282 
emitVSETVLIs(MachineBasicBlock & MBB)1283 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1284   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1285   // Track whether the prefix of the block we've scanned is transparent
1286   // (meaning has not yet changed the abstract state).
1287   bool PrefixTransparent = true;
1288   for (MachineInstr &MI : MBB) {
1289     const VSETVLIInfo PrevInfo = CurInfo;
1290     transferBefore(CurInfo, MI);
1291 
1292     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1293     if (isVectorConfigInstr(MI)) {
1294       // Conservatively, mark the VL and VTYPE as live.
1295       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1296              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1297              "Unexpected operands where VL and VTYPE should be");
1298       MI.getOperand(3).setIsDead(false);
1299       MI.getOperand(4).setIsDead(false);
1300       PrefixTransparent = false;
1301     }
1302 
1303     uint64_t TSFlags = MI.getDesc().TSFlags;
1304     if (RISCVII::hasSEWOp(TSFlags)) {
1305       if (PrevInfo != CurInfo) {
1306         // If this is the first implicit state change, and the state change
1307         // requested can be proven to produce the same register contents, we
1308         // can skip emitting the actual state change and continue as if we
1309         // had since we know the GPR result of the implicit state change
1310         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1311         // we *do* need to model the state as if it changed as while the
1312         // register contents are unchanged, the abstract model can change.
1313         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1314           insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1315         PrefixTransparent = false;
1316       }
1317 
1318       if (RISCVII::hasVLOp(TSFlags)) {
1319         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1320         if (VLOp.isReg()) {
1321           Register Reg = VLOp.getReg();
1322           MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
1323 
1324           // Erase the AVL operand from the instruction.
1325           VLOp.setReg(RISCV::NoRegister);
1326           VLOp.setIsKill(false);
1327 
1328           // If the AVL was an immediate > 31, then it would have been emitted
1329           // as an ADDI. However, the ADDI might not have been used in the
1330           // vsetvli, or a vsetvli might not have been emitted, so it may be
1331           // dead now.
1332           if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
1333               MRI->use_nodbg_empty(Reg))
1334             VLOpDef->eraseFromParent();
1335         }
1336         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1337                                                 /*isImp*/ true));
1338       }
1339       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1340                                               /*isImp*/ true));
1341     }
1342 
1343     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1344         MI.modifiesRegister(RISCV::VTYPE))
1345       PrefixTransparent = false;
1346 
1347     transferAfter(CurInfo, MI);
1348   }
1349 
1350   // If we reach the end of the block and our current info doesn't match the
1351   // expected info, insert a vsetvli to correct.
1352   if (!UseStrictAsserts) {
1353     const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1354     if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1355         CurInfo != ExitInfo) {
1356       // Note there's an implicit assumption here that terminators never use
1357       // or modify VL or VTYPE.  Also, fallthrough will return end().
1358       auto InsertPt = MBB.getFirstInstrTerminator();
1359       insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1360                     CurInfo);
1361       CurInfo = ExitInfo;
1362     }
1363   }
1364 
1365   if (UseStrictAsserts && CurInfo.isValid()) {
1366     const auto &Info = BlockInfo[MBB.getNumber()];
1367     if (CurInfo != Info.Exit) {
1368       LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1369       LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1370       LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1371       LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1372     }
1373     assert(CurInfo == Info.Exit &&
1374            "InsertVSETVLI dataflow invariant violated");
1375   }
1376 }
1377 
1378 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1379 /// we're about to insert by looking for cases where we can PRE from the
1380 /// beginning of one block to the end of one of its predecessors.  Specifically,
1381 /// this is geared to catch the common case of a fixed length vsetvl in a single
1382 /// block loop when it could execute once in the preheader instead.
doPRE(MachineBasicBlock & MBB)1383 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1384   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1385     return;
1386 
1387   MachineBasicBlock *UnavailablePred = nullptr;
1388   VSETVLIInfo AvailableInfo;
1389   for (MachineBasicBlock *P : MBB.predecessors()) {
1390     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1391     if (PredInfo.isUnknown()) {
1392       if (UnavailablePred)
1393         return;
1394       UnavailablePred = P;
1395     } else if (!AvailableInfo.isValid()) {
1396       AvailableInfo = PredInfo;
1397     } else if (AvailableInfo != PredInfo) {
1398       return;
1399     }
1400   }
1401 
1402   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1403   // phase 3.
1404   if (!UnavailablePred || !AvailableInfo.isValid())
1405     return;
1406 
1407   // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1408   // the unavailable pred.
1409   if (AvailableInfo.hasSEWLMULRatioOnly())
1410     return;
1411 
1412   // Critical edge - TODO: consider splitting?
1413   if (UnavailablePred->succ_size() != 1)
1414     return;
1415 
1416   // If the AVL value is a register (other than our VLMAX sentinel),
1417   // we need to prove the value is available at the point we're going
1418   // to insert the vsetvli at.
1419   if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) {
1420     MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg());
1421     if (!AVLDefMI)
1422       return;
1423     // This is an inline dominance check which covers the case of
1424     // UnavailablePred being the preheader of a loop.
1425     if (AVLDefMI->getParent() != UnavailablePred)
1426       return;
1427     for (auto &TermMI : UnavailablePred->terminators())
1428       if (&TermMI == AVLDefMI)
1429         return;
1430   }
1431 
1432   // Model the effect of changing the input state of the block MBB to
1433   // AvailableInfo.  We're looking for two issues here; one legality,
1434   // one profitability.
1435   // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1436   //    may hit the end of the block with a different end state.  We can
1437   //    not make this change without reflowing later blocks as well.
1438   // 2) If we don't actually remove a transition, inserting a vsetvli
1439   //    into the predecessor block would be correct, but unprofitable.
1440   VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1441   VSETVLIInfo CurInfo = AvailableInfo;
1442   int TransitionsRemoved = 0;
1443   for (const MachineInstr &MI : MBB) {
1444     const VSETVLIInfo LastInfo = CurInfo;
1445     const VSETVLIInfo LastOldInfo = OldInfo;
1446     transferBefore(CurInfo, MI);
1447     transferBefore(OldInfo, MI);
1448     if (CurInfo == LastInfo)
1449       TransitionsRemoved++;
1450     if (LastOldInfo == OldInfo)
1451       TransitionsRemoved--;
1452     transferAfter(CurInfo, MI);
1453     transferAfter(OldInfo, MI);
1454     if (CurInfo == OldInfo)
1455       // Convergence.  All transitions after this must match by construction.
1456       break;
1457   }
1458   if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1459     // Issues 1 and 2 above
1460     return;
1461 
1462   // Finally, update both data flow state and insert the actual vsetvli.
1463   // Doing both keeps the code in sync with the dataflow results, which
1464   // is critical for correctness of phase 3.
1465   auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1466   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1467                     << UnavailablePred->getName() << " with state "
1468                     << AvailableInfo << "\n");
1469   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1470   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1471 
1472   // Note there's an implicit assumption here that terminators never use
1473   // or modify VL or VTYPE.  Also, fallthrough will return end().
1474   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1475   insertVSETVLI(*UnavailablePred, InsertPt,
1476                 UnavailablePred->findDebugLoc(InsertPt),
1477                 AvailableInfo, OldExit);
1478 }
1479 
doUnion(DemandedFields & A,DemandedFields B)1480 static void doUnion(DemandedFields &A, DemandedFields B) {
1481   A.VLAny |= B.VLAny;
1482   A.VLZeroness |= B.VLZeroness;
1483   A.SEW = std::max(A.SEW, B.SEW);
1484   A.LMUL |= B.LMUL;
1485   A.SEWLMULRatio |= B.SEWLMULRatio;
1486   A.TailPolicy |= B.TailPolicy;
1487   A.MaskPolicy |= B.MaskPolicy;
1488 }
1489 
1490 // Return true if we can mutate PrevMI to match MI without changing any the
1491 // fields which would be observed.
canMutatePriorConfig(const MachineInstr & PrevMI,const MachineInstr & MI,const DemandedFields & Used,const MachineRegisterInfo & MRI)1492 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1493                                  const MachineInstr &MI,
1494                                  const DemandedFields &Used,
1495                                  const MachineRegisterInfo &MRI) {
1496   // If the VL values aren't equal, return false if either a) the former is
1497   // demanded, or b) we can't rewrite the former to be the later for
1498   // implementation reasons.
1499   if (!isVLPreservingConfig(MI)) {
1500     if (Used.VLAny)
1501       return false;
1502 
1503     if (Used.VLZeroness) {
1504       if (isVLPreservingConfig(PrevMI))
1505         return false;
1506       if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1507                                                        MRI))
1508         return false;
1509     }
1510 
1511     auto &AVL = MI.getOperand(1);
1512     auto &PrevAVL = PrevMI.getOperand(1);
1513     assert(MRI.isSSA());
1514 
1515     // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1516     // For now just check that PrevMI uses the same virtual register.
1517     if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
1518       if (AVL.getReg().isPhysical())
1519         return false;
1520       if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg())
1521         return false;
1522     }
1523   }
1524 
1525   if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1526     return false;
1527 
1528   auto PriorVType = PrevMI.getOperand(2).getImm();
1529   auto VType = MI.getOperand(2).getImm();
1530   return areCompatibleVTYPEs(PriorVType, VType, Used);
1531 }
1532 
doLocalPostpass(MachineBasicBlock & MBB)1533 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1534   MachineInstr *NextMI = nullptr;
1535   // We can have arbitrary code in successors, so VL and VTYPE
1536   // must be considered demanded.
1537   DemandedFields Used;
1538   Used.demandVL();
1539   Used.demandVTYPE();
1540   SmallVector<MachineInstr*> ToDelete;
1541   for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1542 
1543     if (!isVectorConfigInstr(MI)) {
1544       doUnion(Used, getDemanded(MI, MRI, ST));
1545       continue;
1546     }
1547 
1548     Register VRegDef = MI.getOperand(0).getReg();
1549     if (VRegDef != RISCV::X0 &&
1550         !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1551       Used.demandVL();
1552 
1553     if (NextMI) {
1554       if (!Used.usedVL() && !Used.usedVTYPE()) {
1555         ToDelete.push_back(&MI);
1556         // Leave NextMI unchanged
1557         continue;
1558       } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
1559         if (!isVLPreservingConfig(*NextMI)) {
1560           MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
1561           MI.getOperand(0).setIsDead(false);
1562           Register OldVLReg;
1563           if (MI.getOperand(1).isReg())
1564             OldVLReg = MI.getOperand(1).getReg();
1565           if (NextMI->getOperand(1).isImm())
1566             MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1567           else
1568             MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1569           if (OldVLReg) {
1570             MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1571             if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1572                 MRI->use_nodbg_empty(OldVLReg))
1573               VLOpDef->eraseFromParent();
1574           }
1575           MI.setDesc(NextMI->getDesc());
1576         }
1577         MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1578         ToDelete.push_back(NextMI);
1579         // fallthrough
1580       }
1581     }
1582     NextMI = &MI;
1583     Used = getDemanded(MI, MRI, ST);
1584   }
1585 
1586   NumRemovedVSETVL += ToDelete.size();
1587   for (auto *MI : ToDelete)
1588     MI->eraseFromParent();
1589 }
1590 
insertReadVL(MachineBasicBlock & MBB)1591 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1592   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1593     MachineInstr &MI = *I++;
1594     if (RISCV::isFaultFirstLoad(MI)) {
1595       Register VLOutput = MI.getOperand(1).getReg();
1596       if (!MRI->use_nodbg_empty(VLOutput))
1597         BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1598                 VLOutput);
1599       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1600       MI.getOperand(1).setReg(RISCV::X0);
1601     }
1602   }
1603 }
1604 
runOnMachineFunction(MachineFunction & MF)1605 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1606   // Skip if the vector extension is not enabled.
1607   ST = &MF.getSubtarget<RISCVSubtarget>();
1608   if (!ST->hasVInstructions())
1609     return false;
1610 
1611   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1612 
1613   TII = ST->getInstrInfo();
1614   MRI = &MF.getRegInfo();
1615 
1616   assert(BlockInfo.empty() && "Expect empty block infos");
1617   BlockInfo.resize(MF.getNumBlockIDs());
1618 
1619   bool HaveVectorOp = false;
1620 
1621   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1622   for (const MachineBasicBlock &MBB : MF) {
1623     VSETVLIInfo TmpStatus;
1624     HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1625     // Initial exit state is whatever change we found in the block.
1626     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1627     BBInfo.Exit = TmpStatus;
1628     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1629                       << " is " << BBInfo.Exit << "\n");
1630 
1631   }
1632 
1633   // If we didn't find any instructions that need VSETVLI, we're done.
1634   if (!HaveVectorOp) {
1635     BlockInfo.clear();
1636     return false;
1637   }
1638 
1639   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1640   // blocks to the list here, but will also add any that need to be revisited
1641   // during Phase 2 processing.
1642   for (const MachineBasicBlock &MBB : MF) {
1643     WorkList.push(&MBB);
1644     BlockInfo[MBB.getNumber()].InQueue = true;
1645   }
1646   while (!WorkList.empty()) {
1647     const MachineBasicBlock &MBB = *WorkList.front();
1648     WorkList.pop();
1649     computeIncomingVLVTYPE(MBB);
1650   }
1651 
1652   // Perform partial redundancy elimination of vsetvli transitions.
1653   for (MachineBasicBlock &MBB : MF)
1654     doPRE(MBB);
1655 
1656   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1657   // Phase 2 information to avoid adding vsetvlis before the first vector
1658   // instruction in the block if the VL/VTYPE is satisfied by its
1659   // predecessors.
1660   for (MachineBasicBlock &MBB : MF)
1661     emitVSETVLIs(MBB);
1662 
1663   // Now that all vsetvlis are explicit, go through and do block local
1664   // DSE and peephole based demanded fields based transforms.  Note that
1665   // this *must* be done outside the main dataflow so long as we allow
1666   // any cross block analysis within the dataflow.  We can't have both
1667   // demanded fields based mutation and non-local analysis in the
1668   // dataflow at the same time without introducing inconsistencies.
1669   for (MachineBasicBlock &MBB : MF)
1670     doLocalPostpass(MBB);
1671 
1672   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1673   // of VLEFF/VLSEGFF.
1674   for (MachineBasicBlock &MBB : MF)
1675     insertReadVL(MBB);
1676 
1677   BlockInfo.clear();
1678   return HaveVectorOp;
1679 }
1680 
1681 /// Returns an instance of the Insert VSETVLI pass.
createRISCVInsertVSETVLIPass()1682 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1683   return new RISCVInsertVSETVLI();
1684 }
1685