1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11/// c is true if mx has the worst case behavior compared to LMULs in MxList.
12/// On the SiFive7, the worst case LMUL is the Largest LMUL
13/// and the worst case sew is the smallest SEW for that LMUL.
14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
15  defvar LLMUL = LargestLMUL<MxList>.r;
16  bit c = !eq(mx, LLMUL);
17}
18
19/// c is true if mx and sew have the worst case behavior compared to LMULs in
20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
21/// and the worst case sew is the smallest SEW for that LMUL.
22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
23                               bit isF = 0> {
24  defvar LLMUL = LargestLMUL<MxList>.r;
25  defvar SSEW = SmallestSEW<mx, isF>.r;
26  bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
27}
28
29/// Number of DLEN parts = (LMUL * VLEN) / DLEN.
30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
31class SiFive7GetCyclesDefault<string mx> {
32  int c = !cond(
33    !eq(mx, "M1") : 2,
34    !eq(mx, "M2") : 4,
35    !eq(mx, "M4") : 8,
36    !eq(mx, "M8") : 16,
37    !eq(mx, "MF2") : 1,
38    !eq(mx, "MF4") : 1,
39    !eq(mx, "MF8") : 1
40  );
41}
42
43class SiFive7GetCyclesNarrowing<string mx> {
44  int c = !cond(
45    !eq(mx, "M1") : 4,
46    !eq(mx, "M2") : 8,
47    !eq(mx, "M4") : 16,
48    !eq(mx, "MF2") : 2,
49    !eq(mx, "MF4") : 1,
50    !eq(mx, "MF8") : 1
51  );
52}
53
54class SiFive7GetCyclesVMask<string mx> {
55  int c = !cond(
56    !eq(mx, "M1") : 1,
57    !eq(mx, "M2") : 1,
58    !eq(mx, "M4") : 1,
59    !eq(mx, "M8") : 2,
60    !eq(mx, "MF2") : 1,
61    !eq(mx, "MF4") : 1,
62    !eq(mx, "MF8") : 1
63  );
64}
65
66/// VLDM and VSTM can't read/write more than 2 DLENs of data.
67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
68class SiFive7GetMaskLoadStoreCycles<string mx> {
69  int c = !cond(
70    !eq(mx, "M8")  : 2,
71    true : 1
72  );
73}
74
75// Cycles for nf=2 segmented loads and stores are calculated using the
76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
77class SiFive7GetCyclesSegmentedSeg2<string mx> {
78  int c = !cond(
79    !eq(mx, "M1") :  4,
80    !eq(mx, "M2") :  8,
81    !eq(mx, "M4") :  16,
82    !eq(mx, "M8") :  32,
83    !eq(mx, "MF2") : 2,
84    !eq(mx, "MF4") : 1,
85    !eq(mx, "MF8") : 1
86  );
87}
88
89// Cycles for segmented loads and stores are calculated using the
90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92  defvar VLEN = 512;
93  defvar DLEN = 256;
94  // (VLEN * LMUL) / SEW
95  defvar VLUpperBound  = !cond(
96    !eq(mx, "M1") : !div(VLEN, sew),
97    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
98    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
99    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
100    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
101    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
102    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
103  );
104  // We can calculate ceil(a/b) using (a + b - 1) / b.
105  defvar a = !mul(sew, nf);
106  defvar b = DLEN;
107  int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108}
109
110class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111  // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112  // to use a different VLEN, this model will not make scheduling decisions
113  // based on the user specified VLEN.
114  // c = ceil(VLEN / SEW) * LMUL
115  // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116  // largest division performed on VLEN is in MF8 case with division
117  // by 8. Therefore, there is no need to ceil the result.
118  int VLEN = !div(512, sew);
119  int c = !cond(
120    !eq(mx, "M1")  : VLEN,
121    !eq(mx, "M2")  : !mul(VLEN, 2),
122    !eq(mx, "M4")  : !mul(VLEN, 4),
123    !eq(mx, "M8")  : !mul(VLEN, 8),
124    !eq(mx, "MF2") : !div(VLEN, 2),
125    !eq(mx, "MF4") : !div(VLEN, 4),
126    !eq(mx, "MF8") : !div(VLEN, 8)
127  );
128}
129
130class SiFive7GetDivOrSqrtFactor<int sew> {
131  int c = !cond(
132    // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
133    !eq(sew, 8) : 15,
134    !eq(sew, 16) : 15,
135    !eq(sew, 32) : 28,
136    !eq(sew, 64) : 57
137  );
138}
139
140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141/// cycles.
142class SiFive7GetReductionCycles<string mx, int sew> {
143  // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144  // VLUpperBound=(VLEN*LMUL)/SEW.
145  defvar VLEN = 512;
146  defvar DLEN = !div(VLEN, 2);
147  defvar TwoTimesLMUL = !cond(
148    !eq(mx, "M1") : 2,
149    !eq(mx, "M2") : 4,
150    !eq(mx, "M4") : 8,
151    !eq(mx, "M8") : 16,
152    !eq(mx, "MF2") : 1,
153    !eq(mx, "MF4") : 1,
154    !eq(mx, "MF8") : 1
155  );
156  int c = !add(
157    TwoTimesLMUL,
158    !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
159  );
160}
161
162/// Cycles for ordered reductions take approximatley 6*VL cycles
163class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164  defvar VLEN = 512;
165  // (VLEN * LMUL) / SEW
166  defvar VLUpperBound  = !cond(
167    !eq(mx, "M1") : !div(VLEN, sew),
168    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
169    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
170    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
171    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
172    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
173    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
174  );
175  int c = !mul(6, VLUpperBound);
176}
177
178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
179    : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
180                                 WriteShiftImm, WriteShiftImm32,
181                                 WriteShiftReg, WriteShiftReg32,
182                                 WriteSHXADD, WriteSHXADD32,
183                                 WriteRotateImm, WriteRotateImm32,
184                                 WriteRotateReg, WriteRotateReg32,
185                                 WriteSingleBit, WriteSingleBitImm,
186                                 WriteBEXT, WriteBEXTI,
187                                 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
188                                 WriteCPOP, WriteCPOP32,
189                                 WriteREV8, WriteORCB, WriteSFB,
190                                 WriteIMul, WriteIMul32,
191                                 WriteIDiv, WriteIDiv32,
192                                 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
193
194// SiFive7 machine model for scheduling and other instruction cost heuristics.
195def SiFive7Model : SchedMachineModel {
196  let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
197  let IssueWidth = 2;        // 2 micro-ops are dispatched per cycle.
198  let LoadLatency = 3;
199  let MispredictPenalty = 3;
200  let CompleteModel = 0;
201  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
202                             HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
203                             HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
204                             HasStdExtZkr];
205}
206
207// The SiFive7 microarchitecture has three pipelines: A, B, V.
208// Pipe A can handle memory, integer alu and vector operations.
209// Pipe B can handle integer alu, control flow, integer multiply and divide,
210// and floating point computation.
211// The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
212let SchedModel = SiFive7Model in {
213let BufferSize = 0 in {
214def SiFive7PipeA       : ProcResource<1>;
215def SiFive7PipeB       : ProcResource<1>;
216def SiFive7IDiv        : ProcResource<1>; // Int Division
217def SiFive7FDiv        : ProcResource<1>; // FP Division/Sqrt
218def SiFive7VA          : ProcResource<1>; // Arithmetic sequencer
219def SiFive7VL          : ProcResource<1>; // Load sequencer
220def SiFive7VS          : ProcResource<1>; // Store sequencer
221// The VCQ accepts instructions from the the A Pipe and holds them until the
222// vector unit is ready to dequeue them. The unit dequeues up to one instruction
223// per cycle, in order, as soon as the sequencer for that type of instruction is
224// avaliable. This resource is meant to be used for 1 cycle by all vector
225// instructions, to model that only one vector instruction may be dequed at a
226// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
227// VS sequencer resources below. Each of them will only accept a single
228// instruction at a time and remain busy for the number of cycles associated
229// with that instruction.
230def SiFive7VCQ         : ProcResource<1>; // Vector Command Queue
231}
232
233def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
234
235// Branching
236let Latency = 3 in {
237def : WriteRes<WriteJmp, [SiFive7PipeB]>;
238def : WriteRes<WriteJal, [SiFive7PipeB]>;
239def : WriteRes<WriteJalr, [SiFive7PipeB]>;
240}
241
242//Short forward branch
243def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
244  let Latency = 3;
245  let NumMicroOps = 2;
246}
247
248// Integer arithmetic and logic
249let Latency = 3 in {
250def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
251def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
252def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>;
253def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>;
254def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>;
255def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>;
256}
257
258// Integer multiplication
259let Latency = 3 in {
260def : WriteRes<WriteIMul, [SiFive7PipeB]>;
261def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
262}
263
264// Integer division
265def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
266  let Latency = 66;
267  let ReleaseAtCycles = [1, 65];
268}
269def : WriteRes<WriteIDiv32,  [SiFive7PipeB, SiFive7IDiv]> {
270  let Latency = 34;
271  let ReleaseAtCycles = [1, 33];
272}
273
274// Bitmanip
275let Latency = 3 in {
276// Rotates are in the late-B ALU.
277def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
278def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
279def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
280def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
281
282// clz[w]/ctz[w] are in the late-B ALU.
283def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
284def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
285def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
286def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
287
288// cpop[w] look exactly like multiply.
289def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
290def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
291
292// orc.b is in the late-B ALU.
293def : WriteRes<WriteORCB, [SiFive7PipeB]>;
294
295// rev8 is in the late-A and late-B ALUs.
296def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
297
298// shNadd[.uw] is on the early-B and late-B ALUs.
299def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
300def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
301}
302
303// Single-bit instructions
304// BEXT[I] instruction is available on all ALUs and the other instructions
305// are only available on the SiFive7B pipe.
306let Latency = 3 in {
307def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
308def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
309def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
310def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
311}
312
313// Memory
314def : WriteRes<WriteSTB, [SiFive7PipeA]>;
315def : WriteRes<WriteSTH, [SiFive7PipeA]>;
316def : WriteRes<WriteSTW, [SiFive7PipeA]>;
317def : WriteRes<WriteSTD, [SiFive7PipeA]>;
318def : WriteRes<WriteFST16, [SiFive7PipeA]>;
319def : WriteRes<WriteFST32, [SiFive7PipeA]>;
320def : WriteRes<WriteFST64, [SiFive7PipeA]>;
321
322let Latency = 3 in {
323def : WriteRes<WriteLDB, [SiFive7PipeA]>;
324def : WriteRes<WriteLDH, [SiFive7PipeA]>;
325def : WriteRes<WriteLDW, [SiFive7PipeA]>;
326def : WriteRes<WriteLDD, [SiFive7PipeA]>;
327}
328
329let Latency = 2 in {
330def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
331def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
332def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
333}
334
335// Atomic memory
336def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
337def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
338
339let Latency = 3 in {
340def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
341def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
342def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
343def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
344}
345
346// Half precision.
347let Latency = 5 in {
348def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
349def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
350def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
351}
352let Latency = 3 in {
353def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
354def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
355}
356
357let Latency = 14, ReleaseAtCycles = [1, 13] in {
358def :  WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
359def :  WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
360}
361
362// Single precision.
363let Latency = 5 in {
364def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
365def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
366def : WriteRes<WriteFMA32, [SiFive7PipeB]>;
367}
368let Latency = 3 in {
369def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
370def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
371}
372
373def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
374                                                         let ReleaseAtCycles = [1, 26]; }
375def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
376                                                          let ReleaseAtCycles = [1, 26]; }
377
378// Double precision
379let Latency = 7 in {
380def : WriteRes<WriteFAdd64, [SiFive7PipeB]>;
381def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
382def : WriteRes<WriteFMA64, [SiFive7PipeB]>;
383}
384let Latency = 3 in {
385def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
386def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
387}
388
389def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
390                                                         let ReleaseAtCycles = [1, 55]; }
391def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
392                                                          let ReleaseAtCycles = [1, 55]; }
393
394// Conversions
395let Latency = 3 in {
396def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
397def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
398def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
399def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
400def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
401def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
402def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
403def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
404def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
405def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
406def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
407def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
408def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
409def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
410def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
411def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
412def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
413def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
414
415def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
416def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
417def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
418def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
419def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
420def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
421def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
422def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
423def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
424def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
425def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
426def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
427}
428
429// 6. Configuration-Setting Instructions
430let Latency = 3 in {
431def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
432def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
433def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
434}
435
436// 7. Vector Loads and Stores
437// Unit-stride loads and stores can operate at the full bandwidth of the memory
438// pipe. The memory pipe is DLEN bits wide on x280.
439foreach mx = SchedMxList in {
440  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
441  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
442  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
443    defm "" : LMULWriteResMX<"WriteVLDE",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
444    defm "" : LMULWriteResMX<"WriteVLDFF",   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
445  }
446  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
447  defm "" : LMULWriteResMX<"WriteVSTE",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
448}
449
450foreach mx = SchedMxList in {
451  defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
452  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
453  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
454  defm "" : LMULWriteResMX<"WriteVLDM",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
455  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
456  defm "" : LMULWriteResMX<"WriteVSTM",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
457}
458
459// Strided loads and stores operate at one element per cycle and should be
460// scheduled accordingly. Indexed loads and stores operate at one element per
461// cycle, and they stall the machine until all addresses have been generated,
462// so they cannot be scheduled. Indexed and strided loads and stores have LMUL
463// specific suffixes, but since SEW is already encoded in the name of the
464// resource, we do not need to use LMULSEWXXX constructors. However, we do
465// use the SEW from the name to determine the number of Cycles.
466
467// This predicate is true when the rs2 operand of vlse or vsse is x0, false
468// otherwise.
469def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
470
471foreach mx = SchedMxList in {
472  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
473  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
474  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
475  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
476                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
477                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
478  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
479    defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
480    defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
481  }
482  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
483    defm "" : LMULWriteResMX<"WriteVSTS8",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
484    defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
485    defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
486  }
487}
488// TODO: The MxLists need to be filtered by EEW. We only need to support
489// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
490// since LMUL >= 16/64.
491foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
492  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
493  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
494  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
495  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
496                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
497                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
498  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
499    defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
500    defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
501  }
502  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
503    defm "" : LMULWriteResMX<"WriteVSTS16",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
504    defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
505    defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
506  }
507}
508foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
509  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
510  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
511  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
512  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
513                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
514                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
515  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
516    defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
517    defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
518  }
519  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
520    defm "" : LMULWriteResMX<"WriteVSTS32",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
521    defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
522    defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
523  }
524}
525foreach mx = ["M1", "M2", "M4", "M8"] in {
526  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
527  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
528  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
529  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
530                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
531                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
532  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
533    defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
534    defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
535  }
536  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
537    defm "" : LMULWriteResMX<"WriteVSTS64",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
538    defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
539    defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
540  }
541}
542
543// VLD*R is LMUL aware
544let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
545  def : WriteRes<WriteVLD1R,  [SiFive7VCQ, SiFive7VL]>;
546let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
547  def : WriteRes<WriteVLD2R,  [SiFive7VCQ, SiFive7VL]>;
548let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
549  def : WriteRes<WriteVLD4R,  [SiFive7VCQ, SiFive7VL]>;
550let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
551  def : WriteRes<WriteVLD8R,  [SiFive7VCQ, SiFive7VL]>;
552// VST*R is LMUL aware
553let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
554  def : WriteRes<WriteVST1R,   [SiFive7VCQ, SiFive7VS]>;
555let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
556  def : WriteRes<WriteVST2R,   [SiFive7VCQ, SiFive7VS]>;
557let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
558  def : WriteRes<WriteVST4R,   [SiFive7VCQ, SiFive7VS]>;
559let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
560  def : WriteRes<WriteVST8R,   [SiFive7VCQ, SiFive7VS]>;
561
562// Segmented Loads and Stores
563// Unit-stride segmented loads and stores are effectively converted into strided
564// segment loads and stores. Strided segment loads and stores operate at up to
565// one segment per cycle if the segment fits within one aligned memory beat.
566// Indexed segment loads and stores operate at the same rate as strided ones,
567// but they stall the machine until all addresses have been generated.
568foreach mx = SchedMxList in {
569  foreach eew = [8, 16, 32, 64] in {
570    defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
571    defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
572    // Does not chain so set latency high
573    let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
574      defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
575      defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
576    }
577    let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
578    defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
579    foreach nf=3-8 in {
580      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
581      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
582      // Does not chain so set latency high
583      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
584        defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
585        defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
586      }
587      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
588      defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
589    }
590  }
591}
592foreach mx = SchedMxList in {
593  foreach nf=2-8 in {
594    foreach eew = [8, 16, 32, 64] in {
595      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
596      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
597      // Does not chain so set latency high
598      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
599        defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
600        defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
601        defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
602      }
603      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
604        defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
605        defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
606        defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
607      }
608    }
609  }
610}
611
612// 11. Vector Integer Arithmetic Instructions
613foreach mx = SchedMxList in {
614  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
615  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
616  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
617    defm "" : LMULWriteResMX<"WriteVIALUV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
618    defm "" : LMULWriteResMX<"WriteVIALUX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
619    defm "" : LMULWriteResMX<"WriteVIALUI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
620    defm "" : LMULWriteResMX<"WriteVICALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
621    defm "" : LMULWriteResMX<"WriteVICALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
622    defm "" : LMULWriteResMX<"WriteVICALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
623    defm "" : LMULWriteResMX<"WriteVShiftV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
624    defm "" : LMULWriteResMX<"WriteVShiftX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
625    defm "" : LMULWriteResMX<"WriteVShiftI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
626    defm "" : LMULWriteResMX<"WriteVIMinMaxV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
627    defm "" : LMULWriteResMX<"WriteVIMinMaxX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
628    defm "" : LMULWriteResMX<"WriteVIMulV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
629    defm "" : LMULWriteResMX<"WriteVIMulX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
630    defm "" : LMULWriteResMX<"WriteVIMulAddV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
631    defm "" : LMULWriteResMX<"WriteVIMulAddX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
632    defm "" : LMULWriteResMX<"WriteVIMergeV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
633    defm "" : LMULWriteResMX<"WriteVIMergeX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
634    defm "" : LMULWriteResMX<"WriteVIMergeI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
635    defm "" : LMULWriteResMX<"WriteVIMovV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
636    defm "" : LMULWriteResMX<"WriteVIMovX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
637    defm "" : LMULWriteResMX<"WriteVIMovI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
638  }
639  // Mask results can't chain.
640  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
641    defm "" : LMULWriteResMX<"WriteVICmpV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
642    defm "" : LMULWriteResMX<"WriteVICmpX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
643    defm "" : LMULWriteResMX<"WriteVICmpI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
644  }
645}
646foreach mx = SchedMxList in {
647  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
648  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
649  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
650    defm "" : LMULWriteResMX<"WriteVExtV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
651  }
652}
653foreach mx = SchedMxList in {
654  foreach sew = SchedSEWSet<mx>.val in {
655    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
656                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
657    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
658    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
659      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
660      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
661    }
662  }
663}
664
665// Widening
666foreach mx = SchedMxListW in {
667  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
668  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
669  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
670    defm "" : LMULWriteResMX<"WriteVIWALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
671    defm "" : LMULWriteResMX<"WriteVIWALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
672    defm "" : LMULWriteResMX<"WriteVIWALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
673    defm "" : LMULWriteResMX<"WriteVIWMulV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
674    defm "" : LMULWriteResMX<"WriteVIWMulX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
675    defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
676    defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
677  }
678}
679// Narrowing
680foreach mx = SchedMxListW in {
681  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
682  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
683  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
684    defm "" : LMULWriteResMX<"WriteVNShiftV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
685    defm "" : LMULWriteResMX<"WriteVNShiftX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
686    defm "" : LMULWriteResMX<"WriteVNShiftI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
687  }
688}
689
690// 12. Vector Fixed-Point Arithmetic Instructions
691foreach mx = SchedMxList in {
692  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
693  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
694  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
695    defm "" : LMULWriteResMX<"WriteVSALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
696    defm "" : LMULWriteResMX<"WriteVSALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
697    defm "" : LMULWriteResMX<"WriteVSALUI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
698    defm "" : LMULWriteResMX<"WriteVAALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
699    defm "" : LMULWriteResMX<"WriteVAALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
700    defm "" : LMULWriteResMX<"WriteVSMulV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
701    defm "" : LMULWriteResMX<"WriteVSMulX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
702    defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
703    defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
704    defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
705  }
706}
707// Narrowing
708foreach mx = SchedMxListW in {
709  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
710  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
711  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
712    defm "" : LMULWriteResMX<"WriteVNClipV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
713    defm "" : LMULWriteResMX<"WriteVNClipX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
714    defm "" : LMULWriteResMX<"WriteVNClipI",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
715  }
716}
717
718// 13. Vector Floating-Point Instructions
719foreach mx = SchedMxList in {
720  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
721  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
722  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
723    defm "" : LMULWriteResMX<"WriteVFALUV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
724    defm "" : LMULWriteResMX<"WriteVFALUF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
725    defm "" : LMULWriteResMX<"WriteVFMulV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
726    defm "" : LMULWriteResMX<"WriteVFMulF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
727    defm "" : LMULWriteResMX<"WriteVFMulAddV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
728    defm "" : LMULWriteResMX<"WriteVFMulAddF",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
729    defm "" : LMULWriteResMX<"WriteVFRecpV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
730    defm "" : LMULWriteResMX<"WriteVFCvtIToFV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
731    defm "" : LMULWriteResMX<"WriteVFCvtFToIV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
732  }
733  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
734    defm "" : LMULWriteResMX<"WriteVFSgnjV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
735    defm "" : LMULWriteResMX<"WriteVFSgnjF",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
736    defm "" : LMULWriteResMX<"WriteVFMinMaxV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
737    defm "" : LMULWriteResMX<"WriteVFMinMaxF",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
738    defm "" : LMULWriteResMX<"WriteVFClassV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
739    defm "" : LMULWriteResMX<"WriteVFMergeV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
740    defm "" : LMULWriteResMX<"WriteVFMovV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
741  }
742  // Mask results can't chain.
743  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
744    defm "" : LMULWriteResMX<"WriteVFCmpV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
745    defm "" : LMULWriteResMX<"WriteVFCmpF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
746  }
747}
748foreach mx = SchedMxListF in {
749  foreach sew = SchedSEWSet<mx, isF=1>.val in {
750    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
751                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
752    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
753    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
754      defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
755      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
756      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
757    }
758  }
759}
760
761// Widening
762foreach mx = SchedMxListW in {
763  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
764  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
765  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
766    defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
767  }
768}
769foreach mx = SchedMxListFW in {
770  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
771  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
772  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
773    defm "" : LMULWriteResMX<"WriteVFWALUV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
774    defm "" : LMULWriteResMX<"WriteVFWMulV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
775    defm "" : LMULWriteResMX<"WriteVFWMulAddV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
776    defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
777    defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
778    defm "" : LMULWriteResMX<"WriteVFWMulAddF",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
779    defm "" : LMULWriteResMX<"WriteVFWMulF",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
780    defm "" : LMULWriteResMX<"WriteVFWALUF",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
781  }
782}
783// Narrowing
784foreach mx = SchedMxListW in {
785  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
786  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
787  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
788    defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
789  }
790}
791foreach mx = SchedMxListFW in {
792  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
793  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
794  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
795    defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
796    defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
797  }
798}
799
800// 14. Vector Reduction Operations
801foreach mx = SchedMxList in {
802  foreach sew = SchedSEWSet<mx>.val in {
803    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
804    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
805    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
806      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
807                                     mx, sew, IsWorstCase>;
808      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
809                                     mx, sew, IsWorstCase>;
810    }
811  }
812}
813
814foreach mx = SchedMxListWRed in {
815  foreach sew = SchedSEWSet<mx, 0, 1>.val in {
816    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
817    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
818    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
819    defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
820                                   mx, sew, IsWorstCase>;
821  }
822}
823
824foreach mx = SchedMxListF in {
825  foreach sew = SchedSEWSet<mx, 1>.val in {
826    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
827    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
828    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
829      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
830                                     mx, sew, IsWorstCase>;
831      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
832                                     mx, sew, IsWorstCase>;
833    }
834    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
835    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
836    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
837                                   mx, sew, IsWorstCase>;
838  }
839}
840
841foreach mx = SchedMxListFWRed in {
842  foreach sew = SchedSEWSet<mx, 1, 1>.val in {
843    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
844    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
845    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
846    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
847                                   mx, sew, IsWorstCase>;
848    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
849    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
850    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
851                                   mx, sew, IsWorstCase>;
852  }
853}
854
855// 15. Vector Mask Instructions
856foreach mx = SchedMxList in {
857  defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
858  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
859  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
860    defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
861    defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
862    defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
863    defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
864  }
865}
866foreach mx = SchedMxList in {
867  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
868  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
869  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
870    defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
871    defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
872  }
873}
874
875// 16. Vector Permutation Instructions
876let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
877  def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>;
878  def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>;
879  def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>;
880  def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>;
881}
882foreach mx = SchedMxList in {
883  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
884  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
885  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
886    defm "" : LMULWriteResMX<"WriteVRGatherVX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
887    defm "" : LMULWriteResMX<"WriteVRGatherVI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
888  }
889}
890
891foreach mx = SchedMxList in {
892  foreach sew = SchedSEWSet<mx>.val in {
893    defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
894    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
895    let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
896      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
897      defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
898    }
899  }
900}
901
902foreach mx = SchedMxList in {
903  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
904  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
905  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
906    defm "" : LMULWriteResMX<"WriteVISlideX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
907    defm "" : LMULWriteResMX<"WriteVISlideI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
908    defm "" : LMULWriteResMX<"WriteVISlide1X",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
909    defm "" : LMULWriteResMX<"WriteVFSlide1F",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
910  }
911}
912
913// VMov*V is LMUL Aware
914let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
915  def : WriteRes<WriteVMov1V,     [SiFive7VCQ, SiFive7VA]>;
916let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
917  def : WriteRes<WriteVMov2V,     [SiFive7VCQ, SiFive7VA]>;
918let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
919  def : WriteRes<WriteVMov4V,     [SiFive7VCQ, SiFive7VA]>;
920let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
921  def : WriteRes<WriteVMov8V,     [SiFive7VCQ, SiFive7VA]>;
922
923// Others
924def : WriteRes<WriteCSR, [SiFive7PipeB]>;
925def : WriteRes<WriteNop, []>;
926let Latency = 3 in
927  def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
928
929def : InstRW<[WriteIALU], (instrs COPY)>;
930
931//===----------------------------------------------------------------------===//
932
933// Bypass and advance
934def : SiFive7AnyToGPRBypass<ReadJmp>;
935def : SiFive7AnyToGPRBypass<ReadJalr>;
936def : ReadAdvance<ReadCSR, 0>;
937def : ReadAdvance<ReadStoreData, 0>;
938def : ReadAdvance<ReadMemBase, 0>;
939def : SiFive7AnyToGPRBypass<ReadIALU>;
940def : SiFive7AnyToGPRBypass<ReadIALU32>;
941def : SiFive7AnyToGPRBypass<ReadShiftImm>;
942def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
943def : SiFive7AnyToGPRBypass<ReadShiftReg>;
944def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
945def : ReadAdvance<ReadIDiv, 0>;
946def : ReadAdvance<ReadIDiv32, 0>;
947def : ReadAdvance<ReadIMul, 0>;
948def : ReadAdvance<ReadIMul32, 0>;
949def : ReadAdvance<ReadAtomicWA, 0>;
950def : ReadAdvance<ReadAtomicWD, 0>;
951def : ReadAdvance<ReadAtomicDA, 0>;
952def : ReadAdvance<ReadAtomicDD, 0>;
953def : ReadAdvance<ReadAtomicLDW, 0>;
954def : ReadAdvance<ReadAtomicLDD, 0>;
955def : ReadAdvance<ReadAtomicSTW, 0>;
956def : ReadAdvance<ReadAtomicSTD, 0>;
957def : ReadAdvance<ReadFStoreData, 0>;
958def : ReadAdvance<ReadFMemBase, 0>;
959def : ReadAdvance<ReadFAdd16, 0>;
960def : ReadAdvance<ReadFAdd32, 0>;
961def : ReadAdvance<ReadFAdd64, 0>;
962def : ReadAdvance<ReadFMul16, 0>;
963def : ReadAdvance<ReadFMA16, 0>;
964def : ReadAdvance<ReadFMA16Addend, 0>;
965def : ReadAdvance<ReadFMul32, 0>;
966def : ReadAdvance<ReadFMul64, 0>;
967def : ReadAdvance<ReadFMA32, 0>;
968def : ReadAdvance<ReadFMA32Addend, 0>;
969def : ReadAdvance<ReadFMA64, 0>;
970def : ReadAdvance<ReadFMA64Addend, 0>;
971def : ReadAdvance<ReadFDiv16, 0>;
972def : ReadAdvance<ReadFDiv32, 0>;
973def : ReadAdvance<ReadFDiv64, 0>;
974def : ReadAdvance<ReadFSqrt16, 0>;
975def : ReadAdvance<ReadFSqrt32, 0>;
976def : ReadAdvance<ReadFSqrt64, 0>;
977def : ReadAdvance<ReadFCmp16, 0>;
978def : ReadAdvance<ReadFCmp32, 0>;
979def : ReadAdvance<ReadFCmp64, 0>;
980def : ReadAdvance<ReadFSGNJ16, 0>;
981def : ReadAdvance<ReadFSGNJ32, 0>;
982def : ReadAdvance<ReadFSGNJ64, 0>;
983def : ReadAdvance<ReadFMinMax16, 0>;
984def : ReadAdvance<ReadFMinMax32, 0>;
985def : ReadAdvance<ReadFMinMax64, 0>;
986def : ReadAdvance<ReadFCvtF16ToI32, 0>;
987def : ReadAdvance<ReadFCvtF16ToI64, 0>;
988def : ReadAdvance<ReadFCvtF32ToI32, 0>;
989def : ReadAdvance<ReadFCvtF32ToI64, 0>;
990def : ReadAdvance<ReadFCvtF64ToI32, 0>;
991def : ReadAdvance<ReadFCvtF64ToI64, 0>;
992def : ReadAdvance<ReadFCvtI32ToF16, 0>;
993def : ReadAdvance<ReadFCvtI32ToF32, 0>;
994def : ReadAdvance<ReadFCvtI32ToF64, 0>;
995def : ReadAdvance<ReadFCvtI64ToF16, 0>;
996def : ReadAdvance<ReadFCvtI64ToF32, 0>;
997def : ReadAdvance<ReadFCvtI64ToF64, 0>;
998def : ReadAdvance<ReadFCvtF32ToF64, 0>;
999def : ReadAdvance<ReadFCvtF64ToF32, 0>;
1000def : ReadAdvance<ReadFCvtF16ToF32, 0>;
1001def : ReadAdvance<ReadFCvtF32ToF16, 0>;
1002def : ReadAdvance<ReadFCvtF16ToF64, 0>;
1003def : ReadAdvance<ReadFCvtF64ToF16, 0>;
1004def : ReadAdvance<ReadFMovF16ToI16, 0>;
1005def : ReadAdvance<ReadFMovI16ToF16, 0>;
1006def : ReadAdvance<ReadFMovF32ToI32, 0>;
1007def : ReadAdvance<ReadFMovI32ToF32, 0>;
1008def : ReadAdvance<ReadFMovF64ToI64, 0>;
1009def : ReadAdvance<ReadFMovI64ToF64, 0>;
1010def : ReadAdvance<ReadFClass16, 0>;
1011def : ReadAdvance<ReadFClass32, 0>;
1012def : ReadAdvance<ReadFClass64, 0>;
1013
1014def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
1015def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
1016
1017// Bitmanip
1018def : SiFive7AnyToGPRBypass<ReadRotateImm>;
1019def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
1020def : SiFive7AnyToGPRBypass<ReadRotateReg>;
1021def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
1022def : SiFive7AnyToGPRBypass<ReadCLZ>;
1023def : SiFive7AnyToGPRBypass<ReadCLZ32>;
1024def : SiFive7AnyToGPRBypass<ReadCTZ>;
1025def : SiFive7AnyToGPRBypass<ReadCTZ32>;
1026def : ReadAdvance<ReadCPOP, 0>;
1027def : ReadAdvance<ReadCPOP32, 0>;
1028def : SiFive7AnyToGPRBypass<ReadORCB>;
1029def : SiFive7AnyToGPRBypass<ReadREV8>;
1030def : SiFive7AnyToGPRBypass<ReadSHXADD>;
1031def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
1032// Single-bit instructions
1033def : SiFive7AnyToGPRBypass<ReadSingleBit>;
1034def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
1035
1036// 6. Configuration-Setting Instructions
1037def : ReadAdvance<ReadVSETVLI, 2>;
1038def : ReadAdvance<ReadVSETVL, 2>;
1039
1040// 7. Vector Loads and Stores
1041def : ReadAdvance<ReadVLDX, 0>;
1042def : ReadAdvance<ReadVSTX, 0>;
1043defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
1044defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1045def : ReadAdvance<ReadVLDSX, 0>;
1046def : ReadAdvance<ReadVSTSX, 0>;
1047defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1048defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1049defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1050defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1051defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1052defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1053defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1054defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1055defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1056defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1057defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1058defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1059defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1060defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1061defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1062defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1063defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1064defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1065defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1066defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1067defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1068defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1069defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1070defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1071// LMUL Aware
1072def : ReadAdvance<ReadVST1R, 0>;
1073def : ReadAdvance<ReadVST2R, 0>;
1074def : ReadAdvance<ReadVST4R, 0>;
1075def : ReadAdvance<ReadVST8R, 0>;
1076
1077// 12. Vector Integer Arithmetic Instructions
1078defm : LMULReadAdvance<"ReadVIALUV", 0>;
1079defm : LMULReadAdvance<"ReadVIALUX", 0>;
1080defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1081defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1082defm : LMULReadAdvance<"ReadVExtV", 0>;
1083defm : LMULReadAdvance<"ReadVICALUV", 0>;
1084defm : LMULReadAdvance<"ReadVICALUX", 0>;
1085defm : LMULReadAdvance<"ReadVShiftV", 0>;
1086defm : LMULReadAdvance<"ReadVShiftX", 0>;
1087defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1088defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1089defm : LMULReadAdvance<"ReadVICmpV", 0>;
1090defm : LMULReadAdvance<"ReadVICmpX", 0>;
1091defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1092defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1093defm : LMULReadAdvance<"ReadVIMulV", 0>;
1094defm : LMULReadAdvance<"ReadVIMulX", 0>;
1095defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1096defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1097defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1098defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1099defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1100defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1101defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1102defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1103defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1104defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1105defm : LMULReadAdvance<"ReadVIMovV", 0>;
1106defm : LMULReadAdvance<"ReadVIMovX", 0>;
1107
1108// 13. Vector Fixed-Point Arithmetic Instructions
1109defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1110defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1111defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1112defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1113defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1114defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1115defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1116defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1117defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1118defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1119
1120// 14. Vector Floating-Point Instructions
1121defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
1122defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
1123defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
1124defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
1125defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
1126defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
1127defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1128defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1129defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
1130defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
1131defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
1132defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
1133defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
1134defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
1135defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1136defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
1137defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
1138defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
1139defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
1140defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
1141defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1142defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1143defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1144defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1145defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1146defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1147defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
1148defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1149defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1150defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1151defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1152defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1153defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1154defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1155
1156// 15. Vector Reduction Operations
1157def : ReadAdvance<ReadVIRedV, 0>;
1158def : ReadAdvance<ReadVIRedV0, 0>;
1159def : ReadAdvance<ReadVIWRedV, 0>;
1160def : ReadAdvance<ReadVIWRedV0, 0>;
1161def : ReadAdvance<ReadVFRedV, 0>;
1162def : ReadAdvance<ReadVFRedV0, 0>;
1163def : ReadAdvance<ReadVFRedOV, 0>;
1164def : ReadAdvance<ReadVFRedOV0, 0>;
1165def : ReadAdvance<ReadVFWRedV, 0>;
1166def : ReadAdvance<ReadVFWRedV0, 0>;
1167def : ReadAdvance<ReadVFWRedOV, 0>;
1168def : ReadAdvance<ReadVFWRedOV0, 0>;
1169
1170// 16. Vector Mask Instructions
1171defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1172defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1173defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1174defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1175defm "" : LMULReadAdvance<"ReadVMIotV", 0>;
1176
1177// 17. Vector Permutation Instructions
1178def : ReadAdvance<ReadVIMovVX, 0>;
1179def : ReadAdvance<ReadVIMovXV, 0>;
1180def : ReadAdvance<ReadVIMovXX, 0>;
1181def : ReadAdvance<ReadVFMovVF, 0>;
1182def : ReadAdvance<ReadVFMovFV, 0>;
1183def : ReadAdvance<ReadVFMovFX, 0>;
1184defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1185defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1186defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1187defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1188defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1189defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1190defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1191defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1192defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1193defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1194// LMUL Aware
1195def : ReadAdvance<ReadVMov1V, 0>;
1196def : ReadAdvance<ReadVMov2V, 0>;
1197def : ReadAdvance<ReadVMov4V, 0>;
1198def : ReadAdvance<ReadVMov8V, 0>;
1199
1200// Others
1201def : ReadAdvance<ReadVMask, 0>;
1202def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
1203foreach mx = SchedMxList in {
1204  def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
1205  foreach sew = SchedSEWSet<mx>.val in
1206    def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx  # "_E" # sew), 0>;
1207}
1208
1209//===----------------------------------------------------------------------===//
1210// Unsupported extensions
1211defm : UnsupportedSchedZbc;
1212defm : UnsupportedSchedZbkb;
1213defm : UnsupportedSchedZbkx;
1214defm : UnsupportedSchedZfa;
1215}
1216