1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11/// c is true if mx has the worst case behavior compared to LMULs in MxList.
12/// On the SiFive7, the worst case LMUL is the Largest LMUL
13/// and the worst case sew is the smallest SEW for that LMUL.
14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
15  defvar LLMUL = LargestLMUL<MxList>.r;
16  bit c = !eq(mx, LLMUL);
17}
18
19/// c is true if mx and sew have the worst case behavior compared to LMULs in
20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
21/// and the worst case sew is the smallest SEW for that LMUL.
22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
23                               bit isF = 0> {
24  defvar LLMUL = LargestLMUL<MxList>.r;
25  defvar SSEW = SmallestSEW<mx, isF>.r;
26  bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
27}
28
29/// Number of DLEN parts = (LMUL * VLEN) / DLEN.
30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
31class SiFive7GetCyclesDefault<string mx> {
32  int c = !cond(
33    !eq(mx, "M1") : 2,
34    !eq(mx, "M2") : 4,
35    !eq(mx, "M4") : 8,
36    !eq(mx, "M8") : 16,
37    !eq(mx, "MF2") : 1,
38    !eq(mx, "MF4") : 1,
39    !eq(mx, "MF8") : 1
40  );
41}
42
43class SiFive7GetCyclesNarrowing<string mx> {
44  int c = !cond(
45    !eq(mx, "M1") : 4,
46    !eq(mx, "M2") : 8,
47    !eq(mx, "M4") : 16,
48    !eq(mx, "MF2") : 2,
49    !eq(mx, "MF4") : 1,
50    !eq(mx, "MF8") : 1
51  );
52}
53
54class SiFive7GetCyclesVMask<string mx> {
55  int c = !cond(
56    !eq(mx, "M1") : 1,
57    !eq(mx, "M2") : 1,
58    !eq(mx, "M4") : 1,
59    !eq(mx, "M8") : 2,
60    !eq(mx, "MF2") : 1,
61    !eq(mx, "MF4") : 1,
62    !eq(mx, "MF8") : 1
63  );
64}
65
66/// VLDM and VSTM can't read/write more than 2 DLENs of data.
67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
68class SiFive7GetMaskLoadStoreCycles<string mx> {
69  int c = !cond(
70    !eq(mx, "M8")  : 2,
71    true : 1
72  );
73}
74
75// Cycles for nf=2 segmented loads and stores are calculated using the
76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
77class SiFive7GetCyclesSegmentedSeg2<string mx> {
78  int c = !cond(
79    !eq(mx, "M1") :  4,
80    !eq(mx, "M2") :  8,
81    !eq(mx, "M4") :  16,
82    !eq(mx, "M8") :  32,
83    !eq(mx, "MF2") : 2,
84    !eq(mx, "MF4") : 1,
85    !eq(mx, "MF8") : 1
86  );
87}
88
89// Cycles for segmented loads and stores are calculated using the
90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92  defvar VLEN = 512;
93  defvar DLEN = 256;
94  // (VLEN * LMUL) / SEW
95  defvar VLUpperBound  = !cond(
96    !eq(mx, "M1") : !div(VLEN, sew),
97    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
98    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
99    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
100    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
101    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
102    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
103  );
104  // We can calculate ceil(a/b) using (a + b - 1) / b.
105  defvar a = !mul(sew, nf);
106  defvar b = DLEN;
107  int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108}
109
110class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111  // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112  // to use a different VLEN, this model will not make scheduling decisions
113  // based on the user specified VLEN.
114  // c = ceil(VLEN / SEW) * LMUL
115  // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116  // largest division performed on VLEN is in MF8 case with division
117  // by 8. Therefore, there is no need to ceil the result.
118  int VLEN = !div(512, sew);
119  int c = !cond(
120    !eq(mx, "M1")  : VLEN,
121    !eq(mx, "M2")  : !mul(VLEN, 2),
122    !eq(mx, "M4")  : !mul(VLEN, 4),
123    !eq(mx, "M8")  : !mul(VLEN, 8),
124    !eq(mx, "MF2") : !div(VLEN, 2),
125    !eq(mx, "MF4") : !div(VLEN, 4),
126    !eq(mx, "MF8") : !div(VLEN, 8)
127  );
128}
129
130class SiFive7GetDivOrSqrtFactor<int sew> {
131  int c = !cond(
132    // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
133    !eq(sew, 8) : 15,
134    !eq(sew, 16) : 15,
135    !eq(sew, 32) : 28,
136    !eq(sew, 64) : 57
137  );
138}
139
140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141/// cycles.
142class SiFive7GetReductionCycles<string mx, int sew> {
143  // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144  // VLUpperBound=(VLEN*LMUL)/SEW.
145  defvar VLEN = 512;
146  defvar DLEN = !div(VLEN, 2);
147  defvar TwoTimesLMUL = !cond(
148    !eq(mx, "M1") : 2,
149    !eq(mx, "M2") : 4,
150    !eq(mx, "M4") : 8,
151    !eq(mx, "M8") : 16,
152    !eq(mx, "MF2") : 1,
153    !eq(mx, "MF4") : 1,
154    !eq(mx, "MF8") : 1
155  );
156  int c = !add(
157    !div(TwoTimesLMUL, DLEN),
158    !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
159  );
160}
161
162/// Cycles for ordered reductions take approximatley 5*VL cycles
163class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164  defvar VLEN = 512;
165  // (VLEN * LMUL) / SEW
166  defvar VLUpperBound  = !cond(
167    !eq(mx, "M1") : !div(VLEN, sew),
168    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
169    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
170    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
171    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
172    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
173    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
174  );
175  int c = !mul(5, VLUpperBound);
176}
177
178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
179    : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
180                                 WriteShiftImm, WriteShiftImm32,
181                                 WriteShiftReg, WriteShiftReg32,
182                                 WriteSHXADD, WriteSHXADD32,
183                                 WriteRotateImm, WriteRotateImm32,
184                                 WriteRotateReg, WriteRotateReg32,
185                                 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
186                                 WriteCPOP, WriteCPOP32,
187                                 WriteREV8, WriteORCB, WriteSFB,
188                                 WriteIMul, WriteIMul32,
189                                 WriteIDiv, WriteIDiv32,
190                                 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
191
192// SiFive7 machine model for scheduling and other instruction cost heuristics.
193def SiFive7Model : SchedMachineModel {
194  let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
195  let IssueWidth = 2;        // 2 micro-ops are dispatched per cycle.
196  let LoadLatency = 3;
197  let MispredictPenalty = 3;
198  let CompleteModel = 0;
199  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
200                             HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
201                             HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
202                             HasStdExtZkr];
203}
204
205// The SiFive7 microarchitecture has three pipelines: A, B, V.
206// Pipe A can handle memory, integer alu and vector operations.
207// Pipe B can handle integer alu, control flow, integer multiply and divide,
208// and floating point computation.
209// Pipe V can handle the V extension.
210let SchedModel = SiFive7Model in {
211let BufferSize = 0 in {
212def SiFive7PipeA       : ProcResource<1>;
213def SiFive7PipeB       : ProcResource<1>;
214def SiFive7PipeV       : ProcResource<1>;
215}
216
217let BufferSize = 1 in {
218def SiFive7IDiv        : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division
219def SiFive7FDiv        : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt
220def SiFive7VA          : ProcResource<1> { let Super = SiFive7PipeV; } // Arithmetic sequencer
221def SiFive7VL          : ProcResource<1> { let Super = SiFive7PipeV; } // Load sequencer
222def SiFive7VS          : ProcResource<1> { let Super = SiFive7PipeV; } // Store sequencer
223}
224
225def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
226
227// Branching
228let Latency = 3 in {
229def : WriteRes<WriteJmp, [SiFive7PipeB]>;
230def : WriteRes<WriteJal, [SiFive7PipeB]>;
231def : WriteRes<WriteJalr, [SiFive7PipeB]>;
232}
233
234//Short forward branch
235def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
236  let Latency = 3;
237  let NumMicroOps = 2;
238}
239
240// Integer arithmetic and logic
241let Latency = 3 in {
242def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
243def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
244def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>;
245def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>;
246def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>;
247def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>;
248}
249
250// Integer multiplication
251let Latency = 3 in {
252def : WriteRes<WriteIMul, [SiFive7PipeB]>;
253def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
254}
255
256// Integer division
257def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
258  let Latency = 66;
259  let ResourceCycles = [1, 65];
260}
261def : WriteRes<WriteIDiv32,  [SiFive7PipeB, SiFive7IDiv]> {
262  let Latency = 34;
263  let ResourceCycles = [1, 33];
264}
265
266// Bitmanip
267let Latency = 3 in {
268// Rotates are in the late-B ALU.
269def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
270def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
271def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
272def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
273
274// clz[w]/ctz[w] are in the late-B ALU.
275def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
276def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
277def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
278def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
279
280// cpop[w] look exactly like multiply.
281def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
282def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
283
284// orc.b is in the late-B ALU.
285def : WriteRes<WriteORCB, [SiFive7PipeB]>;
286
287// rev8 is in the late-A and late-B ALUs.
288def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
289
290// shNadd[.uw] is on the early-B and late-B ALUs.
291def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
292def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
293}
294
295// Memory
296def : WriteRes<WriteSTB, [SiFive7PipeA]>;
297def : WriteRes<WriteSTH, [SiFive7PipeA]>;
298def : WriteRes<WriteSTW, [SiFive7PipeA]>;
299def : WriteRes<WriteSTD, [SiFive7PipeA]>;
300def : WriteRes<WriteFST16, [SiFive7PipeA]>;
301def : WriteRes<WriteFST32, [SiFive7PipeA]>;
302def : WriteRes<WriteFST64, [SiFive7PipeA]>;
303
304let Latency = 3 in {
305def : WriteRes<WriteLDB, [SiFive7PipeA]>;
306def : WriteRes<WriteLDH, [SiFive7PipeA]>;
307def : WriteRes<WriteLDW, [SiFive7PipeA]>;
308def : WriteRes<WriteLDD, [SiFive7PipeA]>;
309}
310
311let Latency = 2 in {
312def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
313def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
314def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
315}
316
317// Atomic memory
318def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
319def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
320
321let Latency = 3 in {
322def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
323def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
324def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
325def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
326}
327
328// Half precision.
329let Latency = 5 in {
330def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
331def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
332def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
333}
334let Latency = 3 in {
335def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
336def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
337}
338
339let Latency = 14, ResourceCycles = [1, 13] in {
340def :  WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
341def :  WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
342}
343
344// Single precision.
345let Latency = 5 in {
346def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
347def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
348def : WriteRes<WriteFMA32, [SiFive7PipeB]>;
349}
350let Latency = 3 in {
351def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
352def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
353}
354
355def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
356                                                         let ResourceCycles = [1, 26]; }
357def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
358                                                          let ResourceCycles = [1, 26]; }
359
360// Double precision
361let Latency = 7 in {
362def : WriteRes<WriteFAdd64, [SiFive7PipeB]>;
363def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
364def : WriteRes<WriteFMA64, [SiFive7PipeB]>;
365}
366let Latency = 3 in {
367def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
368def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
369}
370
371def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
372                                                         let ResourceCycles = [1, 55]; }
373def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
374                                                          let ResourceCycles = [1, 55]; }
375
376// Conversions
377let Latency = 3 in {
378def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
379def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
380def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
381def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
382def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
383def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
384def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
385def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
386def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
387def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
388def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
389def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
390def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
391def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
392def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
393def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
394def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
395def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
396
397def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
398def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
399def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
400def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
401def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
402def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
403def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
404def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
405def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
406def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
407def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
408def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
409}
410
411// 6. Configuration-Setting Instructions
412let Latency = 3 in {
413def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
414def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
415def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
416}
417
418// 7. Vector Loads and Stores
419// Unit-stride loads and stores can operate at the full bandwidth of the memory
420// pipe. The memory pipe is DLEN bits wide on x280.
421foreach mx = SchedMxList in {
422  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
423  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
424  let Latency = 4, ResourceCycles = [Cycles] in {
425    defm "" : LMULWriteResMX<"WriteVLDE",    [SiFive7VL], mx, IsWorstCase>;
426    defm "" : LMULWriteResMX<"WriteVLDFF",   [SiFive7VL], mx, IsWorstCase>;
427  }
428  let Latency = 1, ResourceCycles = [Cycles] in
429  defm "" : LMULWriteResMX<"WriteVSTE",    [SiFive7VS], mx, IsWorstCase>;
430}
431
432foreach mx = SchedMxList in {
433  defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
434  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
435  let Latency = 4, ResourceCycles = [Cycles] in
436  defm "" : LMULWriteResMX<"WriteVLDM",    [SiFive7VL], mx, IsWorstCase>;
437  let Latency = 1, ResourceCycles = [Cycles] in
438  defm "" : LMULWriteResMX<"WriteVSTM",    [SiFive7VS], mx, IsWorstCase>;
439}
440
441// Strided loads and stores operate at one element per cycle and should be
442// scheduled accordingly. Indexed loads and stores operate at one element per
443// cycle, and they stall the machine until all addresses have been generated,
444// so they cannot be scheduled. Indexed and strided loads and stores have LMUL
445// specific suffixes, but since SEW is already encoded in the name of the
446// resource, we do not need to use LMULSEWXXX constructors. However, we do
447// use the SEW from the name to determine the number of Cycles.
448foreach mx = SchedMxList in {
449  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
450  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
451  let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
452    defm "" : LMULWriteResMX<"WriteVLDS8",  [SiFive7VL], mx, IsWorstCase>;
453    defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>;
454    defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>;
455  }
456  let Latency = 1, ResourceCycles = [Cycles] in {
457    defm "" : LMULWriteResMX<"WriteVSTS8",  [SiFive7VS], mx, IsWorstCase>;
458    defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VS], mx, IsWorstCase>;
459    defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>;
460  }
461}
462foreach mx = SchedMxList in {
463  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
464  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
465  let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
466    defm "" : LMULWriteResMX<"WriteVLDS16",  [SiFive7VL], mx, IsWorstCase>;
467    defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>;
468    defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>;
469  }
470  let Latency = 1, ResourceCycles = [Cycles] in {
471    defm "" : LMULWriteResMX<"WriteVSTS16",  [SiFive7VS], mx, IsWorstCase>;
472    defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VS], mx, IsWorstCase>;
473    defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>;
474  }
475}
476foreach mx = SchedMxList in {
477  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
478  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
479  let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
480    defm "" : LMULWriteResMX<"WriteVLDS32",  [SiFive7VL], mx, IsWorstCase>;
481    defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>;
482    defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>;
483  }
484  let Latency = 1, ResourceCycles = [Cycles] in {
485    defm "" : LMULWriteResMX<"WriteVSTS32",  [SiFive7VS], mx, IsWorstCase>;
486    defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VS], mx, IsWorstCase>;
487    defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>;
488  }
489}
490foreach mx = SchedMxList in {
491  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
492  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
493  let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
494    defm "" : LMULWriteResMX<"WriteVLDS64",  [SiFive7VL], mx, IsWorstCase>;
495    defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>;
496    defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>;
497  }
498  let Latency = 1, ResourceCycles = [Cycles] in {
499    defm "" : LMULWriteResMX<"WriteVSTS64",  [SiFive7VS], mx, IsWorstCase>;
500    defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VS], mx, IsWorstCase>;
501    defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VS], mx, IsWorstCase>;
502  }
503}
504
505// VLD*R is LMUL aware
506let Latency = 4, ResourceCycles = [2] in
507  def : WriteRes<WriteVLD1R,  [SiFive7VL]>;
508let Latency = 4, ResourceCycles = [4] in
509  def : WriteRes<WriteVLD2R,  [SiFive7VL]>;
510let Latency = 4, ResourceCycles = [8] in
511  def : WriteRes<WriteVLD4R,  [SiFive7VL]>;
512let Latency = 4, ResourceCycles = [16] in
513  def : WriteRes<WriteVLD8R,  [SiFive7VL]>;
514// VST*R is LMUL aware
515let Latency = 1, ResourceCycles = [2] in
516  def : WriteRes<WriteVST1R,   [SiFive7VS]>;
517let Latency = 1, ResourceCycles = [4] in
518  def : WriteRes<WriteVST2R,   [SiFive7VS]>;
519let Latency = 1, ResourceCycles = [8] in
520  def : WriteRes<WriteVST4R,   [SiFive7VS]>;
521let Latency = 1, ResourceCycles = [16] in
522  def : WriteRes<WriteVST8R,   [SiFive7VS]>;
523
524// Segmented Loads and Stores
525// Unit-stride segmented loads and stores are effectively converted into strided
526// segment loads and stores. Strided segment loads and stores operate at up to
527// one segment per cycle if the segment fits within one aligned memory beat.
528// Indexed segment loads and stores operate at the same rate as strided ones,
529// but they stall the machine until all addresses have been generated.
530foreach mx = SchedMxList in {
531  foreach eew = [8, 16, 32, 64] in {
532    defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
533    defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
534    // Does not chain so set latency high
535    let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
536      defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew,   [SiFive7VL], mx, IsWorstCase>;
537      defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VL], mx, IsWorstCase>;
538    }
539    let Latency = 1, ResourceCycles = [Cycles] in
540    defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew,   [SiFive7VS], mx, IsWorstCase>;
541    foreach nf=3-8 in {
542      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
543      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
544      // Does not chain so set latency high
545      let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
546        defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,   [SiFive7VL], mx, IsWorstCase>;
547        defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
548      }
549      let Latency = 1, ResourceCycles = [Cycles] in
550      defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew,   [SiFive7VS], mx, IsWorstCase>;
551    }
552  }
553}
554foreach mx = SchedMxList in {
555  foreach nf=2-8 in {
556    foreach eew = [8, 16, 32, 64] in {
557      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
558      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
559      // Does not chain so set latency high
560      let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
561        defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew,  [SiFive7VL], mx, IsWorstCase>;
562        defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
563        defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
564      }
565      let Latency = 1, ResourceCycles = [Cycles] in {
566        defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew,  [SiFive7VS], mx, IsWorstCase>;
567        defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
568        defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
569      }
570    }
571  }
572}
573
574// 11. Vector Integer Arithmetic Instructions
575foreach mx = SchedMxList in {
576  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
577  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
578  let Latency = 4, ResourceCycles = [Cycles] in {
579    defm "" : LMULWriteResMX<"WriteVIALUV",     [SiFive7VA], mx, IsWorstCase>;
580    defm "" : LMULWriteResMX<"WriteVIALUX",     [SiFive7VA], mx, IsWorstCase>;
581    defm "" : LMULWriteResMX<"WriteVIALUI",     [SiFive7VA], mx, IsWorstCase>;
582    defm "" : LMULWriteResMX<"WriteVICALUV",    [SiFive7VA], mx, IsWorstCase>;
583    defm "" : LMULWriteResMX<"WriteVICALUX",    [SiFive7VA], mx, IsWorstCase>;
584    defm "" : LMULWriteResMX<"WriteVICALUI",    [SiFive7VA], mx, IsWorstCase>;
585    defm "" : LMULWriteResMX<"WriteVShiftV",    [SiFive7VA], mx, IsWorstCase>;
586    defm "" : LMULWriteResMX<"WriteVShiftX",    [SiFive7VA], mx, IsWorstCase>;
587    defm "" : LMULWriteResMX<"WriteVShiftI",    [SiFive7VA], mx, IsWorstCase>;
588    defm "" : LMULWriteResMX<"WriteVIMinMaxV",  [SiFive7VA], mx, IsWorstCase>;
589    defm "" : LMULWriteResMX<"WriteVIMinMaxX",  [SiFive7VA], mx, IsWorstCase>;
590    defm "" : LMULWriteResMX<"WriteVIMulV",     [SiFive7VA], mx, IsWorstCase>;
591    defm "" : LMULWriteResMX<"WriteVIMulX",     [SiFive7VA], mx, IsWorstCase>;
592    defm "" : LMULWriteResMX<"WriteVIMulAddV",  [SiFive7VA], mx, IsWorstCase>;
593    defm "" : LMULWriteResMX<"WriteVIMulAddX",  [SiFive7VA], mx, IsWorstCase>;
594    defm "" : LMULWriteResMX<"WriteVIMergeV",   [SiFive7VA], mx, IsWorstCase>;
595    defm "" : LMULWriteResMX<"WriteVIMergeX",   [SiFive7VA], mx, IsWorstCase>;
596    defm "" : LMULWriteResMX<"WriteVIMergeI",   [SiFive7VA], mx, IsWorstCase>;
597    defm "" : LMULWriteResMX<"WriteVIMovV",     [SiFive7VA], mx, IsWorstCase>;
598    defm "" : LMULWriteResMX<"WriteVIMovX",     [SiFive7VA], mx, IsWorstCase>;
599    defm "" : LMULWriteResMX<"WriteVIMovI",     [SiFive7VA], mx, IsWorstCase>;
600  }
601  // Mask results can't chain.
602  let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
603    defm "" : LMULWriteResMX<"WriteVICmpV",     [SiFive7VA], mx, IsWorstCase>;
604    defm "" : LMULWriteResMX<"WriteVICmpX",     [SiFive7VA], mx, IsWorstCase>;
605    defm "" : LMULWriteResMX<"WriteVICmpI",     [SiFive7VA], mx, IsWorstCase>;
606  }
607}
608foreach mx = SchedMxList in {
609  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
610  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
611  let Latency = 4, ResourceCycles = [Cycles] in {
612    defm "" : LMULWriteResMX<"WriteVExtV",      [SiFive7VA], mx, IsWorstCase>;
613  }
614}
615foreach mx = SchedMxList in {
616  foreach sew = SchedSEWSet<mx>.val in {
617    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
618                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
619    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
620    let Latency = Cycles, ResourceCycles = [Cycles] in {
621      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VA], mx, sew, IsWorstCase>;
622      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VA], mx, sew, IsWorstCase>;
623    }
624  }
625}
626
627// Widening
628foreach mx = SchedMxListW in {
629  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
630  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
631  let Latency = 8, ResourceCycles = [Cycles] in {
632    defm "" : LMULWriteResMX<"WriteVIWALUV",    [SiFive7VA], mx, IsWorstCase>;
633    defm "" : LMULWriteResMX<"WriteVIWALUX",    [SiFive7VA], mx, IsWorstCase>;
634    defm "" : LMULWriteResMX<"WriteVIWALUI",    [SiFive7VA], mx, IsWorstCase>;
635    defm "" : LMULWriteResMX<"WriteVIWMulV",    [SiFive7VA], mx, IsWorstCase>;
636    defm "" : LMULWriteResMX<"WriteVIWMulX",    [SiFive7VA], mx, IsWorstCase>;
637    defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VA], mx, IsWorstCase>;
638    defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VA], mx, IsWorstCase>;
639  }
640}
641// Narrowing
642foreach mx = SchedMxListW in {
643  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
644  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
645  let Latency = 8, ResourceCycles = [Cycles] in {
646    defm "" : LMULWriteResMX<"WriteVNShiftV",   [SiFive7VA], mx, IsWorstCase>;
647    defm "" : LMULWriteResMX<"WriteVNShiftX",   [SiFive7VA], mx, IsWorstCase>;
648    defm "" : LMULWriteResMX<"WriteVNShiftI",   [SiFive7VA], mx, IsWorstCase>;
649  }
650}
651
652// 12. Vector Fixed-Point Arithmetic Instructions
653foreach mx = SchedMxList in {
654  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
655  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
656  let Latency = 8, ResourceCycles = [Cycles] in {
657    defm "" : LMULWriteResMX<"WriteVSALUV",   [SiFive7VA], mx, IsWorstCase>;
658    defm "" : LMULWriteResMX<"WriteVSALUX",   [SiFive7VA], mx, IsWorstCase>;
659    defm "" : LMULWriteResMX<"WriteVSALUI",   [SiFive7VA], mx, IsWorstCase>;
660    defm "" : LMULWriteResMX<"WriteVAALUV",   [SiFive7VA], mx, IsWorstCase>;
661    defm "" : LMULWriteResMX<"WriteVAALUX",   [SiFive7VA], mx, IsWorstCase>;
662    defm "" : LMULWriteResMX<"WriteVSMulV",   [SiFive7VA], mx, IsWorstCase>;
663    defm "" : LMULWriteResMX<"WriteVSMulX",   [SiFive7VA], mx, IsWorstCase>;
664    defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VA], mx, IsWorstCase>;
665    defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VA], mx, IsWorstCase>;
666    defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VA], mx, IsWorstCase>;
667  }
668}
669// Narrowing
670foreach mx = SchedMxListW in {
671  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
672  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
673  let Latency = 8, ResourceCycles = [Cycles] in {
674    defm "" : LMULWriteResMX<"WriteVNClipV",  [SiFive7VA], mx, IsWorstCase>;
675    defm "" : LMULWriteResMX<"WriteVNClipX",  [SiFive7VA], mx, IsWorstCase>;
676    defm "" : LMULWriteResMX<"WriteVNClipI",  [SiFive7VA], mx, IsWorstCase>;
677  }
678}
679
680// 13. Vector Floating-Point Instructions
681foreach mx = SchedMxList in {
682  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
683  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
684  let Latency = 8, ResourceCycles = [Cycles] in {
685    defm "" : LMULWriteResMX<"WriteVFALUV",      [SiFive7VA], mx, IsWorstCase>;
686    defm "" : LMULWriteResMX<"WriteVFALUF",      [SiFive7VA], mx, IsWorstCase>;
687    defm "" : LMULWriteResMX<"WriteVFMulV",      [SiFive7VA], mx, IsWorstCase>;
688    defm "" : LMULWriteResMX<"WriteVFMulF",      [SiFive7VA], mx, IsWorstCase>;
689    defm "" : LMULWriteResMX<"WriteVFMulAddV",   [SiFive7VA], mx, IsWorstCase>;
690    defm "" : LMULWriteResMX<"WriteVFMulAddF",   [SiFive7VA], mx, IsWorstCase>;
691    defm "" : LMULWriteResMX<"WriteVFRecpV",     [SiFive7VA], mx, IsWorstCase>;
692    defm "" : LMULWriteResMX<"WriteVFCvtIToFV",  [SiFive7VA], mx, IsWorstCase>;
693    defm "" : LMULWriteResMX<"WriteVFCvtFToIV",  [SiFive7VA], mx, IsWorstCase>;
694  }
695  let Latency = 4, ResourceCycles = [Cycles] in {
696    defm "" : LMULWriteResMX<"WriteVFSgnjV",     [SiFive7VA], mx, IsWorstCase>;
697    defm "" : LMULWriteResMX<"WriteVFSgnjF",     [SiFive7VA], mx, IsWorstCase>;
698    defm "" : LMULWriteResMX<"WriteVFMinMaxV",   [SiFive7VA], mx, IsWorstCase>;
699    defm "" : LMULWriteResMX<"WriteVFMinMaxF",   [SiFive7VA], mx, IsWorstCase>;
700    defm "" : LMULWriteResMX<"WriteVFClassV",    [SiFive7VA], mx, IsWorstCase>;
701    defm "" : LMULWriteResMX<"WriteVFMergeV",    [SiFive7VA], mx, IsWorstCase>;
702    defm "" : LMULWriteResMX<"WriteVFMovV",      [SiFive7VA], mx, IsWorstCase>;
703  }
704  // Mask results can't chain.
705  let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
706    defm "" : LMULWriteResMX<"WriteVFCmpV",      [SiFive7VA], mx, IsWorstCase>;
707    defm "" : LMULWriteResMX<"WriteVFCmpF",      [SiFive7VA], mx, IsWorstCase>;
708  }
709}
710foreach mx = SchedMxListF in {
711  foreach sew = SchedSEWSet<mx, isF=1>.val in {
712    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
713                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
714    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
715    let Latency = Cycles, ResourceCycles = [Cycles] in {
716      defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VA], mx, sew, IsWorstCase>;
717      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV",  [SiFive7VA], mx, sew, IsWorstCase>;
718      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF",  [SiFive7VA], mx, sew, IsWorstCase>;
719    }
720  }
721}
722
723// Widening
724foreach mx = SchedMxListW in {
725  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
726  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
727  let Latency = 8, ResourceCycles = [Cycles] in {
728    defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
729  }
730}
731foreach mx = SchedMxListFW in {
732  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
733  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
734  let Latency = 8, ResourceCycles = [Cycles] in {
735    defm "" : LMULWriteResMX<"WriteVFWALUV",     [SiFive7VA], mx, IsWorstCase>;
736    defm "" : LMULWriteResMX<"WriteVFWMulV",     [SiFive7VA], mx, IsWorstCase>;
737    defm "" : LMULWriteResMX<"WriteVFWMulAddV",  [SiFive7VA], mx, IsWorstCase>;
738    defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
739    defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
740    defm "" : LMULWriteResMX<"WriteVFWMulAddF",  [SiFive7VA], mx, IsWorstCase>;
741    defm "" : LMULWriteResMX<"WriteVFWMulF",     [SiFive7VA], mx, IsWorstCase>;
742    defm "" : LMULWriteResMX<"WriteVFWALUF",     [SiFive7VA], mx, IsWorstCase>;
743  }
744}
745// Narrowing
746foreach mx = SchedMxListW in {
747  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
748  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
749  let Latency = 8, ResourceCycles = [Cycles] in {
750    defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
751  }
752}
753foreach mx = SchedMxListFW in {
754  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
755  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
756  let Latency = 8, ResourceCycles = [Cycles] in {
757    defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
758    defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
759  }
760}
761
762// 14. Vector Reduction Operations
763foreach mx = SchedMxList in {
764  foreach sew = SchedSEWSet<mx>.val in {
765    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
766    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
767    let Latency = Cycles, ResourceCycles = [Cycles] in
768    defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA],
769                                   mx, sew, IsWorstCase>;
770    defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VA],
771                                   mx, sew, IsWorstCase>;
772  }
773}
774
775foreach mx = SchedMxListWRed in {
776  foreach sew = SchedSEWSet<mx, 0, 1>.val in {
777    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
778    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
779    let Latency = Cycles, ResourceCycles = [Cycles] in
780    defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA],
781                                   mx, sew, IsWorstCase>;
782  }
783}
784
785foreach mx = SchedMxListF in {
786  foreach sew = SchedSEWSet<mx, 1>.val in {
787    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
788    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
789    let Latency = RedCycles, ResourceCycles = [RedCycles] in {
790      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA],
791                                     mx, sew, IsWorstCase>;
792      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA],
793                                     mx, sew, IsWorstCase>;
794    }
795    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
796    let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
797    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA],
798                                   mx, sew, IsWorstCase>;
799  }
800}
801
802foreach mx = SchedMxListFWRed in {
803  foreach sew = SchedSEWSet<mx, 1, 1>.val in {
804    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
805    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
806    let Latency = RedCycles, ResourceCycles = [RedCycles] in
807    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA],
808                                   mx, sew, IsWorstCase>;
809    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
810    let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
811    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA],
812                                   mx, sew, IsWorstCase>;
813  }
814}
815
816// 15. Vector Mask Instructions
817foreach mx = SchedMxList in {
818  defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
819  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
820  let Latency = 4, ResourceCycles = [Cycles] in {
821    defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VA], mx, IsWorstCase>;
822    defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VA], mx, IsWorstCase>;
823    defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VA], mx, IsWorstCase>;
824    defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VA], mx, IsWorstCase>;
825  }
826}
827foreach mx = SchedMxList in {
828  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
829  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
830  let Latency = 4, ResourceCycles = [Cycles] in {
831    defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VA], mx, IsWorstCase>;
832    defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VA], mx, IsWorstCase>;
833  }
834}
835
836// 16. Vector Permutation Instructions
837let Latency = 4, ResourceCycles = [1] in {
838  def : WriteRes<WriteVIMovVX, [SiFive7VA]>;
839  def : WriteRes<WriteVIMovXV, [SiFive7VA]>;
840  def : WriteRes<WriteVFMovVF, [SiFive7VA]>;
841  def : WriteRes<WriteVFMovFV, [SiFive7VA]>;
842}
843foreach mx = SchedMxList in {
844  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
845  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
846  let Latency = 8, ResourceCycles = [Cycles] in {
847    defm "" : LMULWriteResMX<"WriteVRGatherVX",    [SiFive7VA], mx, IsWorstCase>;
848    defm "" : LMULWriteResMX<"WriteVRGatherVI",    [SiFive7VA], mx, IsWorstCase>;
849  }
850}
851
852foreach mx = SchedMxList in {
853  foreach sew = SchedSEWSet<mx>.val in {
854    defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
855    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
856    let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
857      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VA], mx, sew, IsWorstCase>;
858      defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VA], mx, sew, IsWorstCase>;
859    }
860  }
861}
862
863foreach mx = SchedMxList in {
864  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
865  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
866  let Latency = 4, ResourceCycles = [Cycles] in {
867    defm "" : LMULWriteResMX<"WriteVISlideX",   [SiFive7VA], mx, IsWorstCase>;
868    defm "" : LMULWriteResMX<"WriteVISlideI",   [SiFive7VA], mx, IsWorstCase>;
869    defm "" : LMULWriteResMX<"WriteVISlide1X",  [SiFive7VA], mx, IsWorstCase>;
870    defm "" : LMULWriteResMX<"WriteVFSlide1F",  [SiFive7VA], mx, IsWorstCase>;
871  }
872}
873
874// VMov*V is LMUL Aware
875let Latency = 4, ResourceCycles = [2] in
876  def : WriteRes<WriteVMov1V,     [SiFive7VA]>;
877let Latency = 4, ResourceCycles = [4] in
878  def : WriteRes<WriteVMov2V,     [SiFive7VA]>;
879let Latency = 4, ResourceCycles = [8] in
880  def : WriteRes<WriteVMov4V,     [SiFive7VA]>;
881let Latency = 4, ResourceCycles = [16] in
882  def : WriteRes<WriteVMov8V,     [SiFive7VA]>;
883
884// Others
885def : WriteRes<WriteCSR, [SiFive7PipeB]>;
886def : WriteRes<WriteNop, []>;
887let Latency = 3 in
888  def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
889
890def : InstRW<[WriteIALU], (instrs COPY)>;
891
892//===----------------------------------------------------------------------===//
893
894// Bypass and advance
895def : SiFive7AnyToGPRBypass<ReadJmp>;
896def : SiFive7AnyToGPRBypass<ReadJalr>;
897def : ReadAdvance<ReadCSR, 0>;
898def : ReadAdvance<ReadStoreData, 0>;
899def : ReadAdvance<ReadMemBase, 0>;
900def : SiFive7AnyToGPRBypass<ReadIALU>;
901def : SiFive7AnyToGPRBypass<ReadIALU32>;
902def : SiFive7AnyToGPRBypass<ReadShiftImm>;
903def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
904def : SiFive7AnyToGPRBypass<ReadShiftReg>;
905def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
906def : ReadAdvance<ReadIDiv, 0>;
907def : ReadAdvance<ReadIDiv32, 0>;
908def : ReadAdvance<ReadIMul, 0>;
909def : ReadAdvance<ReadIMul32, 0>;
910def : ReadAdvance<ReadAtomicWA, 0>;
911def : ReadAdvance<ReadAtomicWD, 0>;
912def : ReadAdvance<ReadAtomicDA, 0>;
913def : ReadAdvance<ReadAtomicDD, 0>;
914def : ReadAdvance<ReadAtomicLDW, 0>;
915def : ReadAdvance<ReadAtomicLDD, 0>;
916def : ReadAdvance<ReadAtomicSTW, 0>;
917def : ReadAdvance<ReadAtomicSTD, 0>;
918def : ReadAdvance<ReadFStoreData, 0>;
919def : ReadAdvance<ReadFMemBase, 0>;
920def : ReadAdvance<ReadFAdd16, 0>;
921def : ReadAdvance<ReadFAdd32, 0>;
922def : ReadAdvance<ReadFAdd64, 0>;
923def : ReadAdvance<ReadFMul16, 0>;
924def : ReadAdvance<ReadFMA16, 0>;
925def : ReadAdvance<ReadFMul32, 0>;
926def : ReadAdvance<ReadFMul64, 0>;
927def : ReadAdvance<ReadFMA32, 0>;
928def : ReadAdvance<ReadFMA64, 0>;
929def : ReadAdvance<ReadFDiv16, 0>;
930def : ReadAdvance<ReadFDiv32, 0>;
931def : ReadAdvance<ReadFDiv64, 0>;
932def : ReadAdvance<ReadFSqrt16, 0>;
933def : ReadAdvance<ReadFSqrt32, 0>;
934def : ReadAdvance<ReadFSqrt64, 0>;
935def : ReadAdvance<ReadFCmp16, 0>;
936def : ReadAdvance<ReadFCmp32, 0>;
937def : ReadAdvance<ReadFCmp64, 0>;
938def : ReadAdvance<ReadFSGNJ16, 0>;
939def : ReadAdvance<ReadFSGNJ32, 0>;
940def : ReadAdvance<ReadFSGNJ64, 0>;
941def : ReadAdvance<ReadFMinMax16, 0>;
942def : ReadAdvance<ReadFMinMax32, 0>;
943def : ReadAdvance<ReadFMinMax64, 0>;
944def : ReadAdvance<ReadFCvtF16ToI32, 0>;
945def : ReadAdvance<ReadFCvtF16ToI64, 0>;
946def : ReadAdvance<ReadFCvtF32ToI32, 0>;
947def : ReadAdvance<ReadFCvtF32ToI64, 0>;
948def : ReadAdvance<ReadFCvtF64ToI32, 0>;
949def : ReadAdvance<ReadFCvtF64ToI64, 0>;
950def : ReadAdvance<ReadFCvtI32ToF16, 0>;
951def : ReadAdvance<ReadFCvtI32ToF32, 0>;
952def : ReadAdvance<ReadFCvtI32ToF64, 0>;
953def : ReadAdvance<ReadFCvtI64ToF16, 0>;
954def : ReadAdvance<ReadFCvtI64ToF32, 0>;
955def : ReadAdvance<ReadFCvtI64ToF64, 0>;
956def : ReadAdvance<ReadFCvtF32ToF64, 0>;
957def : ReadAdvance<ReadFCvtF64ToF32, 0>;
958def : ReadAdvance<ReadFCvtF16ToF32, 0>;
959def : ReadAdvance<ReadFCvtF32ToF16, 0>;
960def : ReadAdvance<ReadFCvtF16ToF64, 0>;
961def : ReadAdvance<ReadFCvtF64ToF16, 0>;
962def : ReadAdvance<ReadFMovF16ToI16, 0>;
963def : ReadAdvance<ReadFMovI16ToF16, 0>;
964def : ReadAdvance<ReadFMovF32ToI32, 0>;
965def : ReadAdvance<ReadFMovI32ToF32, 0>;
966def : ReadAdvance<ReadFMovF64ToI64, 0>;
967def : ReadAdvance<ReadFMovI64ToF64, 0>;
968def : ReadAdvance<ReadFClass16, 0>;
969def : ReadAdvance<ReadFClass32, 0>;
970def : ReadAdvance<ReadFClass64, 0>;
971
972def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
973def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
974
975// Bitmanip
976def : SiFive7AnyToGPRBypass<ReadRotateImm>;
977def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
978def : SiFive7AnyToGPRBypass<ReadRotateReg>;
979def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
980def : SiFive7AnyToGPRBypass<ReadCLZ>;
981def : SiFive7AnyToGPRBypass<ReadCLZ32>;
982def : SiFive7AnyToGPRBypass<ReadCTZ>;
983def : SiFive7AnyToGPRBypass<ReadCTZ32>;
984def : ReadAdvance<ReadCPOP, 0>;
985def : ReadAdvance<ReadCPOP32, 0>;
986def : SiFive7AnyToGPRBypass<ReadORCB>;
987def : SiFive7AnyToGPRBypass<ReadREV8>;
988def : SiFive7AnyToGPRBypass<ReadSHXADD>;
989def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
990
991// 6. Configuration-Setting Instructions
992def : ReadAdvance<ReadVSETVLI, 2>;
993def : ReadAdvance<ReadVSETVL, 2>;
994
995// 7. Vector Loads and Stores
996def : ReadAdvance<ReadVLDX, 0>;
997def : ReadAdvance<ReadVSTX, 0>;
998defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
999defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1000def : ReadAdvance<ReadVLDSX, 0>;
1001def : ReadAdvance<ReadVSTSX, 0>;
1002defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1003defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1004defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1005defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1006defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1007defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1008defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1009defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1010defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1011defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1012defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1013defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1014defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1015defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1016defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1017defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1018defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1019defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1020defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1021defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1022defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1023defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1024defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1025defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1026// LMUL Aware
1027def : ReadAdvance<ReadVST1R, 0>;
1028def : ReadAdvance<ReadVST2R, 0>;
1029def : ReadAdvance<ReadVST4R, 0>;
1030def : ReadAdvance<ReadVST8R, 0>;
1031
1032// 12. Vector Integer Arithmetic Instructions
1033defm : LMULReadAdvance<"ReadVIALUV", 0>;
1034defm : LMULReadAdvance<"ReadVIALUX", 0>;
1035defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1036defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1037defm : LMULReadAdvance<"ReadVExtV", 0>;
1038defm : LMULReadAdvance<"ReadVICALUV", 0>;
1039defm : LMULReadAdvance<"ReadVICALUX", 0>;
1040defm : LMULReadAdvance<"ReadVShiftV", 0>;
1041defm : LMULReadAdvance<"ReadVShiftX", 0>;
1042defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1043defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1044defm : LMULReadAdvance<"ReadVICmpV", 0>;
1045defm : LMULReadAdvance<"ReadVICmpX", 0>;
1046defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1047defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1048defm : LMULReadAdvance<"ReadVIMulV", 0>;
1049defm : LMULReadAdvance<"ReadVIMulX", 0>;
1050defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1051defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1052defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1053defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1054defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1055defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1056defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1057defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1058defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1059defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1060defm : LMULReadAdvance<"ReadVIMovV", 0>;
1061defm : LMULReadAdvance<"ReadVIMovX", 0>;
1062
1063// 13. Vector Fixed-Point Arithmetic Instructions
1064defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1065defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1066defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1067defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1068defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1069defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1070defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1071defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1072defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1073defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1074
1075// 14. Vector Floating-Point Instructions
1076defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
1077defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
1078defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
1079defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
1080defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
1081defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
1082defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1083defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1084defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
1085defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
1086defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
1087defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
1088defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
1089defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
1090defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1091defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
1092defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
1093defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
1094defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
1095defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
1096defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1097defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1098defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1099defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1100defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1101defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1102defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
1103defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1104defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1105defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1106defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1107defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1108defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1109defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1110
1111// 15. Vector Reduction Operations
1112def : ReadAdvance<ReadVIRedV, 0>;
1113def : ReadAdvance<ReadVIRedV0, 0>;
1114def : ReadAdvance<ReadVIWRedV, 0>;
1115def : ReadAdvance<ReadVIWRedV0, 0>;
1116def : ReadAdvance<ReadVFRedV, 0>;
1117def : ReadAdvance<ReadVFRedV0, 0>;
1118def : ReadAdvance<ReadVFRedOV, 0>;
1119def : ReadAdvance<ReadVFRedOV0, 0>;
1120def : ReadAdvance<ReadVFWRedV, 0>;
1121def : ReadAdvance<ReadVFWRedV0, 0>;
1122def : ReadAdvance<ReadVFWRedOV, 0>;
1123def : ReadAdvance<ReadVFWRedOV0, 0>;
1124
1125// 16. Vector Mask Instructions
1126defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1127defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1128defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1129defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1130defm "" : LMULReadAdvance<"ReadVMIotV", 0>;
1131
1132// 17. Vector Permutation Instructions
1133def : ReadAdvance<ReadVIMovVX, 0>;
1134def : ReadAdvance<ReadVIMovXV, 0>;
1135def : ReadAdvance<ReadVIMovXX, 0>;
1136def : ReadAdvance<ReadVFMovVF, 0>;
1137def : ReadAdvance<ReadVFMovFV, 0>;
1138def : ReadAdvance<ReadVFMovFX, 0>;
1139defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1140defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1141defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1142defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1143defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1144defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1145defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1146defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1147defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1148defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1149// LMUL Aware
1150def : ReadAdvance<ReadVMov1V, 0>;
1151def : ReadAdvance<ReadVMov2V, 0>;
1152def : ReadAdvance<ReadVMov4V, 0>;
1153def : ReadAdvance<ReadVMov8V, 0>;
1154
1155// Others
1156def : ReadAdvance<ReadVMask, 0>;
1157
1158//===----------------------------------------------------------------------===//
1159// Unsupported extensions
1160defm : UnsupportedSchedZbc;
1161defm : UnsupportedSchedZbs;
1162defm : UnsupportedSchedZbkb;
1163defm : UnsupportedSchedZbkx;
1164defm : UnsupportedSchedZfa;
1165}
1166