1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11/// c is true if mx has the worst case behavior compared to LMULs in MxList. 12/// On the SiFive7, the worst case LMUL is the Largest LMUL 13/// and the worst case sew is the smallest SEW for that LMUL. 14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> { 15 defvar LLMUL = LargestLMUL<MxList>.r; 16 bit c = !eq(mx, LLMUL); 17} 18 19/// c is true if mx and sew have the worst case behavior compared to LMULs in 20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL 21/// and the worst case sew is the smallest SEW for that LMUL. 22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, 23 bit isF = 0> { 24 defvar LLMUL = LargestLMUL<MxList>.r; 25 defvar SSEW = SmallestSEW<mx, isF>.r; 26 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); 27} 28 29/// Number of DLEN parts = (LMUL * VLEN) / DLEN. 30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL. 31class SiFive7GetCyclesDefault<string mx> { 32 int c = !cond( 33 !eq(mx, "M1") : 2, 34 !eq(mx, "M2") : 4, 35 !eq(mx, "M4") : 8, 36 !eq(mx, "M8") : 16, 37 !eq(mx, "MF2") : 1, 38 !eq(mx, "MF4") : 1, 39 !eq(mx, "MF8") : 1 40 ); 41} 42 43class SiFive7GetCyclesNarrowing<string mx> { 44 int c = !cond( 45 !eq(mx, "M1") : 4, 46 !eq(mx, "M2") : 8, 47 !eq(mx, "M4") : 16, 48 !eq(mx, "MF2") : 2, 49 !eq(mx, "MF4") : 1, 50 !eq(mx, "MF8") : 1 51 ); 52} 53 54class SiFive7GetCyclesVMask<string mx> { 55 int c = !cond( 56 !eq(mx, "M1") : 1, 57 !eq(mx, "M2") : 1, 58 !eq(mx, "M4") : 1, 59 !eq(mx, "M8") : 2, 60 !eq(mx, "MF2") : 1, 61 !eq(mx, "MF4") : 1, 62 !eq(mx, "MF8") : 1 63 ); 64} 65 66/// VLDM and VSTM can't read/write more than 2 DLENs of data. 67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs 68class SiFive7GetMaskLoadStoreCycles<string mx> { 69 int c = !cond( 70 !eq(mx, "M8") : 2, 71 true : 1 72 ); 73} 74 75// Cycles for nf=2 segmented loads and stores are calculated using the 76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL 77class SiFive7GetCyclesSegmentedSeg2<string mx> { 78 int c = !cond( 79 !eq(mx, "M1") : 4, 80 !eq(mx, "M2") : 8, 81 !eq(mx, "M4") : 16, 82 !eq(mx, "M8") : 32, 83 !eq(mx, "MF2") : 2, 84 !eq(mx, "MF4") : 1, 85 !eq(mx, "MF8") : 1 86 ); 87} 88 89// Cycles for segmented loads and stores are calculated using the 90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size. 91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> { 92 defvar VLEN = 512; 93 defvar DLEN = 256; 94 // (VLEN * LMUL) / SEW 95 defvar VLUpperBound = !cond( 96 !eq(mx, "M1") : !div(VLEN, sew), 97 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 98 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 99 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 100 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 101 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 102 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 103 ); 104 // We can calculate ceil(a/b) using (a + b - 1) / b. 105 defvar a = !mul(sew, nf); 106 defvar b = DLEN; 107 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b)); 108} 109 110class SiFive7GetCyclesOnePerElement<string mx, int sew> { 111 // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler 112 // to use a different VLEN, this model will not make scheduling decisions 113 // based on the user specified VLEN. 114 // c = ceil(VLEN / SEW) * LMUL 115 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the 116 // largest division performed on VLEN is in MF8 case with division 117 // by 8. Therefore, there is no need to ceil the result. 118 int VLEN = !div(512, sew); 119 int c = !cond( 120 !eq(mx, "M1") : VLEN, 121 !eq(mx, "M2") : !mul(VLEN, 2), 122 !eq(mx, "M4") : !mul(VLEN, 4), 123 !eq(mx, "M8") : !mul(VLEN, 8), 124 !eq(mx, "MF2") : !div(VLEN, 2), 125 !eq(mx, "MF4") : !div(VLEN, 4), 126 !eq(mx, "MF8") : !div(VLEN, 8) 127 ); 128} 129 130class SiFive7GetDivOrSqrtFactor<int sew> { 131 int c = !cond( 132 // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case. 133 !eq(sew, 8) : 15, 134 !eq(sew, 16) : 15, 135 !eq(sew, 32) : 28, 136 !eq(sew, 64) : 57 137 ); 138} 139 140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW)) 141/// cycles. 142class SiFive7GetReductionCycles<string mx, int sew> { 143 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since 144 // VLUpperBound=(VLEN*LMUL)/SEW. 145 defvar VLEN = 512; 146 defvar DLEN = !div(VLEN, 2); 147 defvar TwoTimesLMUL = !cond( 148 !eq(mx, "M1") : 2, 149 !eq(mx, "M2") : 4, 150 !eq(mx, "M4") : 8, 151 !eq(mx, "M8") : 16, 152 !eq(mx, "MF2") : 1, 153 !eq(mx, "MF4") : 1, 154 !eq(mx, "MF8") : 1 155 ); 156 int c = !add( 157 TwoTimesLMUL, 158 !mul(5, !add(4, !logtwo(!div(DLEN, sew)))) 159 ); 160} 161 162/// Cycles for ordered reductions take approximatley 6*VL cycles 163class SiFive7GetOrderedReductionCycles<string mx, int sew> { 164 defvar VLEN = 512; 165 // (VLEN * LMUL) / SEW 166 defvar VLUpperBound = !cond( 167 !eq(mx, "M1") : !div(VLEN, sew), 168 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 169 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 170 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 171 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 172 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 173 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 174 ); 175 int c = !mul(6, VLUpperBound); 176} 177 178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> 179 : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32, 180 WriteShiftImm, WriteShiftImm32, 181 WriteShiftReg, WriteShiftReg32, 182 WriteSHXADD, WriteSHXADD32, 183 WriteRotateImm, WriteRotateImm32, 184 WriteRotateReg, WriteRotateReg32, 185 WriteSingleBit, WriteSingleBitImm, 186 WriteBEXT, WriteBEXTI, 187 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32, 188 WriteCPOP, WriteCPOP32, 189 WriteREV8, WriteORCB, WriteSFB, 190 WriteIMul, WriteIMul32, 191 WriteIDiv, WriteIDiv32, 192 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>; 193 194// SiFive7 machine model for scheduling and other instruction cost heuristics. 195def SiFive7Model : SchedMachineModel { 196 let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. 197 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 198 let LoadLatency = 3; 199 let MispredictPenalty = 3; 200 let CompleteModel = 0; 201 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, 202 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, 203 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, 204 HasStdExtZkr]; 205} 206 207// The SiFive7 microarchitecture has three pipelines: A, B, V. 208// Pipe A can handle memory, integer alu and vector operations. 209// Pipe B can handle integer alu, control flow, integer multiply and divide, 210// and floating point computation. 211// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. 212let SchedModel = SiFive7Model in { 213let BufferSize = 0 in { 214def SiFive7PipeA : ProcResource<1>; 215def SiFive7PipeB : ProcResource<1>; 216def SiFive7IDiv : ProcResource<1>; // Int Division 217def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt 218def SiFive7VA : ProcResource<1>; // Arithmetic sequencer 219def SiFive7VL : ProcResource<1>; // Load sequencer 220def SiFive7VS : ProcResource<1>; // Store sequencer 221// The VCQ accepts instructions from the the A Pipe and holds them until the 222// vector unit is ready to dequeue them. The unit dequeues up to one instruction 223// per cycle, in order, as soon as the sequencer for that type of instruction is 224// avaliable. This resource is meant to be used for 1 cycle by all vector 225// instructions, to model that only one vector instruction may be dequed at a 226// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and 227// VS sequencer resources below. Each of them will only accept a single 228// instruction at a time and remain busy for the number of cycles associated 229// with that instruction. 230def SiFive7VCQ : ProcResource<1>; // Vector Command Queue 231} 232 233def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; 234 235// Branching 236let Latency = 3 in { 237def : WriteRes<WriteJmp, [SiFive7PipeB]>; 238def : WriteRes<WriteJal, [SiFive7PipeB]>; 239def : WriteRes<WriteJalr, [SiFive7PipeB]>; 240} 241 242//Short forward branch 243def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> { 244 let Latency = 3; 245 let NumMicroOps = 2; 246} 247 248// Integer arithmetic and logic 249let Latency = 3 in { 250def : WriteRes<WriteIALU, [SiFive7PipeAB]>; 251def : WriteRes<WriteIALU32, [SiFive7PipeAB]>; 252def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>; 253def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>; 254def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>; 255def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>; 256} 257 258// Integer multiplication 259let Latency = 3 in { 260def : WriteRes<WriteIMul, [SiFive7PipeB]>; 261def : WriteRes<WriteIMul32, [SiFive7PipeB]>; 262} 263 264// Integer division 265def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> { 266 let Latency = 66; 267 let ReleaseAtCycles = [1, 65]; 268} 269def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> { 270 let Latency = 34; 271 let ReleaseAtCycles = [1, 33]; 272} 273 274// Bitmanip 275let Latency = 3 in { 276// Rotates are in the late-B ALU. 277def : WriteRes<WriteRotateImm, [SiFive7PipeB]>; 278def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>; 279def : WriteRes<WriteRotateReg, [SiFive7PipeB]>; 280def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>; 281 282// clz[w]/ctz[w] are in the late-B ALU. 283def : WriteRes<WriteCLZ, [SiFive7PipeB]>; 284def : WriteRes<WriteCLZ32, [SiFive7PipeB]>; 285def : WriteRes<WriteCTZ, [SiFive7PipeB]>; 286def : WriteRes<WriteCTZ32, [SiFive7PipeB]>; 287 288// cpop[w] look exactly like multiply. 289def : WriteRes<WriteCPOP, [SiFive7PipeB]>; 290def : WriteRes<WriteCPOP32, [SiFive7PipeB]>; 291 292// orc.b is in the late-B ALU. 293def : WriteRes<WriteORCB, [SiFive7PipeB]>; 294 295// rev8 is in the late-A and late-B ALUs. 296def : WriteRes<WriteREV8, [SiFive7PipeAB]>; 297 298// shNadd[.uw] is on the early-B and late-B ALUs. 299def : WriteRes<WriteSHXADD, [SiFive7PipeB]>; 300def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>; 301} 302 303// Single-bit instructions 304// BEXT[I] instruction is available on all ALUs and the other instructions 305// are only available on the SiFive7B pipe. 306let Latency = 3 in { 307def : WriteRes<WriteSingleBit, [SiFive7PipeB]>; 308def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>; 309def : WriteRes<WriteBEXT, [SiFive7PipeAB]>; 310def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>; 311} 312 313// Memory 314def : WriteRes<WriteSTB, [SiFive7PipeA]>; 315def : WriteRes<WriteSTH, [SiFive7PipeA]>; 316def : WriteRes<WriteSTW, [SiFive7PipeA]>; 317def : WriteRes<WriteSTD, [SiFive7PipeA]>; 318def : WriteRes<WriteFST16, [SiFive7PipeA]>; 319def : WriteRes<WriteFST32, [SiFive7PipeA]>; 320def : WriteRes<WriteFST64, [SiFive7PipeA]>; 321 322let Latency = 3 in { 323def : WriteRes<WriteLDB, [SiFive7PipeA]>; 324def : WriteRes<WriteLDH, [SiFive7PipeA]>; 325def : WriteRes<WriteLDW, [SiFive7PipeA]>; 326def : WriteRes<WriteLDD, [SiFive7PipeA]>; 327} 328 329let Latency = 2 in { 330def : WriteRes<WriteFLD16, [SiFive7PipeA]>; 331def : WriteRes<WriteFLD32, [SiFive7PipeA]>; 332def : WriteRes<WriteFLD64, [SiFive7PipeA]>; 333} 334 335// Atomic memory 336def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>; 337def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>; 338 339let Latency = 3 in { 340def : WriteRes<WriteAtomicW, [SiFive7PipeA]>; 341def : WriteRes<WriteAtomicD, [SiFive7PipeA]>; 342def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>; 343def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>; 344} 345 346// Half precision. 347let Latency = 5 in { 348def : WriteRes<WriteFAdd16, [SiFive7PipeB]>; 349def : WriteRes<WriteFMul16, [SiFive7PipeB]>; 350def : WriteRes<WriteFMA16, [SiFive7PipeB]>; 351} 352let Latency = 3 in { 353def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>; 354def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>; 355} 356 357let Latency = 14, ReleaseAtCycles = [1, 13] in { 358def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>; 359def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>; 360} 361 362// Single precision. 363let Latency = 5 in { 364def : WriteRes<WriteFAdd32, [SiFive7PipeB]>; 365def : WriteRes<WriteFMul32, [SiFive7PipeB]>; 366def : WriteRes<WriteFMA32, [SiFive7PipeB]>; 367} 368let Latency = 3 in { 369def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>; 370def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>; 371} 372 373def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 374 let ReleaseAtCycles = [1, 26]; } 375def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 376 let ReleaseAtCycles = [1, 26]; } 377 378// Double precision 379let Latency = 7 in { 380def : WriteRes<WriteFAdd64, [SiFive7PipeB]>; 381def : WriteRes<WriteFMul64, [SiFive7PipeB]>; 382def : WriteRes<WriteFMA64, [SiFive7PipeB]>; 383} 384let Latency = 3 in { 385def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>; 386def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>; 387} 388 389def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 390 let ReleaseAtCycles = [1, 55]; } 391def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 392 let ReleaseAtCycles = [1, 55]; } 393 394// Conversions 395let Latency = 3 in { 396def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>; 397def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>; 398def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>; 399def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>; 400def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>; 401def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>; 402def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>; 403def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>; 404def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>; 405def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>; 406def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>; 407def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>; 408def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>; 409def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>; 410def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>; 411def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>; 412def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>; 413def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>; 414 415def : WriteRes<WriteFClass16, [SiFive7PipeB]>; 416def : WriteRes<WriteFClass32, [SiFive7PipeB]>; 417def : WriteRes<WriteFClass64, [SiFive7PipeB]>; 418def : WriteRes<WriteFCmp16, [SiFive7PipeB]>; 419def : WriteRes<WriteFCmp32, [SiFive7PipeB]>; 420def : WriteRes<WriteFCmp64, [SiFive7PipeB]>; 421def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>; 422def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>; 423def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>; 424def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>; 425def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>; 426def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>; 427} 428 429// 6. Configuration-Setting Instructions 430let Latency = 3 in { 431def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>; 432def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>; 433def : WriteRes<WriteVSETVL, [SiFive7PipeA]>; 434} 435 436// 7. Vector Loads and Stores 437// Unit-stride loads and stores can operate at the full bandwidth of the memory 438// pipe. The memory pipe is DLEN bits wide on x280. 439foreach mx = SchedMxList in { 440 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 441 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 442 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 443 defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 444 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 445 } 446 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 447 defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 448} 449 450foreach mx = SchedMxList in { 451 defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c; 452 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 453 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 454 defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 455 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 456 defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 457} 458 459// Strided loads and stores operate at one element per cycle and should be 460// scheduled accordingly. Indexed loads and stores operate at one element per 461// cycle, and they stall the machine until all addresses have been generated, 462// so they cannot be scheduled. Indexed and strided loads and stores have LMUL 463// specific suffixes, but since SEW is already encoded in the name of the 464// resource, we do not need to use LMULSEWXXX constructors. However, we do 465// use the SEW from the name to determine the number of Cycles. 466 467// This predicate is true when the rs2 operand of vlse or vsse is x0, false 468// otherwise. 469def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>; 470 471foreach mx = SchedMxList in { 472 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 473 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c; 474 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 475 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 476 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 477 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 478 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 479 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 480 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 481 } 482 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 483 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 484 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 485 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 486 } 487} 488// TODO: The MxLists need to be filtered by EEW. We only need to support 489// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8 490// since LMUL >= 16/64. 491foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in { 492 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 493 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c; 494 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 495 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 496 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 497 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 498 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 499 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 500 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 501 } 502 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 503 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 504 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 505 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 506 } 507} 508foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in { 509 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 510 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c; 511 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 512 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 513 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 514 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 515 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 516 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 517 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 518 } 519 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 520 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 521 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 522 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 523 } 524} 525foreach mx = ["M1", "M2", "M4", "M8"] in { 526 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 527 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c; 528 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 529 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 530 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 531 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 532 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 533 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 534 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 535 } 536 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 537 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 538 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 539 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 540 } 541} 542 543// VLD*R is LMUL aware 544let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 545 def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>; 546let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 547 def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>; 548let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 549 def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>; 550let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 551 def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>; 552// VST*R is LMUL aware 553let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 554 def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>; 555let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 556 def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>; 557let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 558 def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>; 559let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 560 def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>; 561 562// Segmented Loads and Stores 563// Unit-stride segmented loads and stores are effectively converted into strided 564// segment loads and stores. Strided segment loads and stores operate at up to 565// one segment per cycle if the segment fits within one aligned memory beat. 566// Indexed segment loads and stores operate at the same rate as strided ones, 567// but they stall the machine until all addresses have been generated. 568foreach mx = SchedMxList in { 569 foreach eew = [8, 16, 32, 64] in { 570 defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c; 571 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 572 // Does not chain so set latency high 573 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 574 defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 575 defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 576 } 577 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 578 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 579 foreach nf=3-8 in { 580 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 581 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 582 // Does not chain so set latency high 583 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 584 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 585 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 586 } 587 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 588 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 589 } 590 } 591} 592foreach mx = SchedMxList in { 593 foreach nf=2-8 in { 594 foreach eew = [8, 16, 32, 64] in { 595 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 596 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 597 // Does not chain so set latency high 598 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 599 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 600 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 601 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 602 } 603 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 604 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 605 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 606 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 607 } 608 } 609 } 610} 611 612// 11. Vector Integer Arithmetic Instructions 613foreach mx = SchedMxList in { 614 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 615 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 616 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 617 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 618 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 619 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 620 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 621 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 622 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 623 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 624 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 625 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 626 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 627 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 628 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 629 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 630 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 631 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 632 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 633 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 634 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 635 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 636 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 637 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 638 } 639 // Mask results can't chain. 640 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 641 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 642 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 643 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 644 } 645} 646foreach mx = SchedMxList in { 647 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 648 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 649 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 650 defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 651 } 652} 653foreach mx = SchedMxList in { 654 foreach sew = SchedSEWSet<mx>.val in { 655 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 656 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 657 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 658 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 659 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 660 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 661 } 662 } 663} 664 665// Widening 666foreach mx = SchedMxListW in { 667 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 668 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 669 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 670 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 671 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 672 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 673 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 674 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 675 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 676 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 677 } 678} 679// Narrowing 680foreach mx = SchedMxListW in { 681 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 682 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 683 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 684 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 685 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 686 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 687 } 688} 689 690// 12. Vector Fixed-Point Arithmetic Instructions 691foreach mx = SchedMxList in { 692 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 693 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 694 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 695 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 696 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 697 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 698 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 699 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 700 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 701 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 702 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 703 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 704 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 705 } 706} 707// Narrowing 708foreach mx = SchedMxListW in { 709 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 710 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 711 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 712 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 713 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 714 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 715 } 716} 717 718// 13. Vector Floating-Point Instructions 719foreach mx = SchedMxList in { 720 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 721 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 722 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 723 defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 724 defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 725 defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 726 defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 727 defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 728 defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 729 defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 730 defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 731 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 732 } 733 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 734 defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 735 defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 736 defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 737 defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 738 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 739 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 740 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 741 } 742 // Mask results can't chain. 743 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 744 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 745 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 746 } 747} 748foreach mx = SchedMxListF in { 749 foreach sew = SchedSEWSet<mx, isF=1>.val in { 750 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 751 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 752 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 753 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 754 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 755 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 756 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 757 } 758 } 759} 760 761// Widening 762foreach mx = SchedMxListW in { 763 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 764 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 765 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 766 defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 767 } 768} 769foreach mx = SchedMxListFW in { 770 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 771 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 772 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 773 defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 774 defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 775 defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 776 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 777 defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 778 defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 779 defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 780 defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 781 } 782} 783// Narrowing 784foreach mx = SchedMxListW in { 785 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 786 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 787 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 788 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 789 } 790} 791foreach mx = SchedMxListFW in { 792 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 793 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 794 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 795 defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 796 defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 797 } 798} 799 800// 14. Vector Reduction Operations 801foreach mx = SchedMxList in { 802 foreach sew = SchedSEWSet<mx>.val in { 803 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 804 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 805 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 806 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA], 807 mx, sew, IsWorstCase>; 808 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 809 mx, sew, IsWorstCase>; 810 } 811 } 812} 813 814foreach mx = SchedMxListWRed in { 815 foreach sew = SchedSEWSet<mx, 0, 1>.val in { 816 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 817 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; 818 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 819 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA], 820 mx, sew, IsWorstCase>; 821 } 822} 823 824foreach mx = SchedMxListF in { 825 foreach sew = SchedSEWSet<mx, 1>.val in { 826 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 827 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 828 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in { 829 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA], 830 mx, sew, IsWorstCase>; 831 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 832 mx, sew, IsWorstCase>; 833 } 834 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 835 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 836 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA], 837 mx, sew, IsWorstCase>; 838 } 839} 840 841foreach mx = SchedMxListFWRed in { 842 foreach sew = SchedSEWSet<mx, 1, 1>.val in { 843 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 844 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; 845 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in 846 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA], 847 mx, sew, IsWorstCase>; 848 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 849 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 850 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA], 851 mx, sew, IsWorstCase>; 852 } 853} 854 855// 15. Vector Mask Instructions 856foreach mx = SchedMxList in { 857 defvar Cycles = SiFive7GetCyclesVMask<mx>.c; 858 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 859 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 860 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 861 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 862 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 863 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 864 } 865} 866foreach mx = SchedMxList in { 867 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 868 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 869 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 870 defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 871 defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 872 } 873} 874 875// 16. Vector Permutation Instructions 876let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in { 877 def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>; 878 def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>; 879 def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>; 880 def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>; 881} 882foreach mx = SchedMxList in { 883 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 884 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 885 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 886 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 887 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 888 } 889} 890 891foreach mx = SchedMxList in { 892 foreach sew = SchedSEWSet<mx>.val in { 893 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c; 894 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 895 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 896 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 897 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 898 } 899 } 900} 901 902foreach mx = SchedMxList in { 903 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 904 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 905 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 906 defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 907 defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 908 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 909 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 910 } 911} 912 913// VMov*V is LMUL Aware 914let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 915 def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>; 916let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 917 def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>; 918let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 919 def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>; 920let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 921 def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>; 922 923// Others 924def : WriteRes<WriteCSR, [SiFive7PipeB]>; 925def : WriteRes<WriteNop, []>; 926let Latency = 3 in 927 def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>; 928 929def : InstRW<[WriteIALU], (instrs COPY)>; 930 931//===----------------------------------------------------------------------===// 932 933// Bypass and advance 934def : SiFive7AnyToGPRBypass<ReadJmp>; 935def : SiFive7AnyToGPRBypass<ReadJalr>; 936def : ReadAdvance<ReadCSR, 0>; 937def : ReadAdvance<ReadStoreData, 0>; 938def : ReadAdvance<ReadMemBase, 0>; 939def : SiFive7AnyToGPRBypass<ReadIALU>; 940def : SiFive7AnyToGPRBypass<ReadIALU32>; 941def : SiFive7AnyToGPRBypass<ReadShiftImm>; 942def : SiFive7AnyToGPRBypass<ReadShiftImm32>; 943def : SiFive7AnyToGPRBypass<ReadShiftReg>; 944def : SiFive7AnyToGPRBypass<ReadShiftReg32>; 945def : ReadAdvance<ReadIDiv, 0>; 946def : ReadAdvance<ReadIDiv32, 0>; 947def : ReadAdvance<ReadIMul, 0>; 948def : ReadAdvance<ReadIMul32, 0>; 949def : ReadAdvance<ReadAtomicWA, 0>; 950def : ReadAdvance<ReadAtomicWD, 0>; 951def : ReadAdvance<ReadAtomicDA, 0>; 952def : ReadAdvance<ReadAtomicDD, 0>; 953def : ReadAdvance<ReadAtomicLDW, 0>; 954def : ReadAdvance<ReadAtomicLDD, 0>; 955def : ReadAdvance<ReadAtomicSTW, 0>; 956def : ReadAdvance<ReadAtomicSTD, 0>; 957def : ReadAdvance<ReadFStoreData, 0>; 958def : ReadAdvance<ReadFMemBase, 0>; 959def : ReadAdvance<ReadFAdd16, 0>; 960def : ReadAdvance<ReadFAdd32, 0>; 961def : ReadAdvance<ReadFAdd64, 0>; 962def : ReadAdvance<ReadFMul16, 0>; 963def : ReadAdvance<ReadFMA16, 0>; 964def : ReadAdvance<ReadFMA16Addend, 0>; 965def : ReadAdvance<ReadFMul32, 0>; 966def : ReadAdvance<ReadFMul64, 0>; 967def : ReadAdvance<ReadFMA32, 0>; 968def : ReadAdvance<ReadFMA32Addend, 0>; 969def : ReadAdvance<ReadFMA64, 0>; 970def : ReadAdvance<ReadFMA64Addend, 0>; 971def : ReadAdvance<ReadFDiv16, 0>; 972def : ReadAdvance<ReadFDiv32, 0>; 973def : ReadAdvance<ReadFDiv64, 0>; 974def : ReadAdvance<ReadFSqrt16, 0>; 975def : ReadAdvance<ReadFSqrt32, 0>; 976def : ReadAdvance<ReadFSqrt64, 0>; 977def : ReadAdvance<ReadFCmp16, 0>; 978def : ReadAdvance<ReadFCmp32, 0>; 979def : ReadAdvance<ReadFCmp64, 0>; 980def : ReadAdvance<ReadFSGNJ16, 0>; 981def : ReadAdvance<ReadFSGNJ32, 0>; 982def : ReadAdvance<ReadFSGNJ64, 0>; 983def : ReadAdvance<ReadFMinMax16, 0>; 984def : ReadAdvance<ReadFMinMax32, 0>; 985def : ReadAdvance<ReadFMinMax64, 0>; 986def : ReadAdvance<ReadFCvtF16ToI32, 0>; 987def : ReadAdvance<ReadFCvtF16ToI64, 0>; 988def : ReadAdvance<ReadFCvtF32ToI32, 0>; 989def : ReadAdvance<ReadFCvtF32ToI64, 0>; 990def : ReadAdvance<ReadFCvtF64ToI32, 0>; 991def : ReadAdvance<ReadFCvtF64ToI64, 0>; 992def : ReadAdvance<ReadFCvtI32ToF16, 0>; 993def : ReadAdvance<ReadFCvtI32ToF32, 0>; 994def : ReadAdvance<ReadFCvtI32ToF64, 0>; 995def : ReadAdvance<ReadFCvtI64ToF16, 0>; 996def : ReadAdvance<ReadFCvtI64ToF32, 0>; 997def : ReadAdvance<ReadFCvtI64ToF64, 0>; 998def : ReadAdvance<ReadFCvtF32ToF64, 0>; 999def : ReadAdvance<ReadFCvtF64ToF32, 0>; 1000def : ReadAdvance<ReadFCvtF16ToF32, 0>; 1001def : ReadAdvance<ReadFCvtF32ToF16, 0>; 1002def : ReadAdvance<ReadFCvtF16ToF64, 0>; 1003def : ReadAdvance<ReadFCvtF64ToF16, 0>; 1004def : ReadAdvance<ReadFMovF16ToI16, 0>; 1005def : ReadAdvance<ReadFMovI16ToF16, 0>; 1006def : ReadAdvance<ReadFMovF32ToI32, 0>; 1007def : ReadAdvance<ReadFMovI32ToF32, 0>; 1008def : ReadAdvance<ReadFMovF64ToI64, 0>; 1009def : ReadAdvance<ReadFMovI64ToF64, 0>; 1010def : ReadAdvance<ReadFClass16, 0>; 1011def : ReadAdvance<ReadFClass32, 0>; 1012def : ReadAdvance<ReadFClass64, 0>; 1013 1014def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>; 1015def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>; 1016 1017// Bitmanip 1018def : SiFive7AnyToGPRBypass<ReadRotateImm>; 1019def : SiFive7AnyToGPRBypass<ReadRotateImm32>; 1020def : SiFive7AnyToGPRBypass<ReadRotateReg>; 1021def : SiFive7AnyToGPRBypass<ReadRotateReg32>; 1022def : SiFive7AnyToGPRBypass<ReadCLZ>; 1023def : SiFive7AnyToGPRBypass<ReadCLZ32>; 1024def : SiFive7AnyToGPRBypass<ReadCTZ>; 1025def : SiFive7AnyToGPRBypass<ReadCTZ32>; 1026def : ReadAdvance<ReadCPOP, 0>; 1027def : ReadAdvance<ReadCPOP32, 0>; 1028def : SiFive7AnyToGPRBypass<ReadORCB>; 1029def : SiFive7AnyToGPRBypass<ReadREV8>; 1030def : SiFive7AnyToGPRBypass<ReadSHXADD>; 1031def : SiFive7AnyToGPRBypass<ReadSHXADD32>; 1032// Single-bit instructions 1033def : SiFive7AnyToGPRBypass<ReadSingleBit>; 1034def : SiFive7AnyToGPRBypass<ReadSingleBitImm>; 1035 1036// 6. Configuration-Setting Instructions 1037def : ReadAdvance<ReadVSETVLI, 2>; 1038def : ReadAdvance<ReadVSETVL, 2>; 1039 1040// 7. Vector Loads and Stores 1041def : ReadAdvance<ReadVLDX, 0>; 1042def : ReadAdvance<ReadVSTX, 0>; 1043defm "" : LMULReadAdvance<"ReadVSTEV", 0>; 1044defm "" : LMULReadAdvance<"ReadVSTM", 0>; 1045def : ReadAdvance<ReadVLDSX, 0>; 1046def : ReadAdvance<ReadVSTSX, 0>; 1047defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; 1048defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; 1049defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; 1050defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; 1051defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; 1052defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; 1053defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; 1054defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; 1055defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; 1056defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; 1057defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; 1058defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; 1059defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; 1060defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; 1061defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; 1062defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; 1063defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; 1064defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; 1065defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; 1066defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; 1067defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; 1068defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; 1069defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; 1070defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; 1071// LMUL Aware 1072def : ReadAdvance<ReadVST1R, 0>; 1073def : ReadAdvance<ReadVST2R, 0>; 1074def : ReadAdvance<ReadVST4R, 0>; 1075def : ReadAdvance<ReadVST8R, 0>; 1076 1077// 12. Vector Integer Arithmetic Instructions 1078defm : LMULReadAdvance<"ReadVIALUV", 0>; 1079defm : LMULReadAdvance<"ReadVIALUX", 0>; 1080defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; 1081defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; 1082defm : LMULReadAdvance<"ReadVExtV", 0>; 1083defm : LMULReadAdvance<"ReadVICALUV", 0>; 1084defm : LMULReadAdvance<"ReadVICALUX", 0>; 1085defm : LMULReadAdvance<"ReadVShiftV", 0>; 1086defm : LMULReadAdvance<"ReadVShiftX", 0>; 1087defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; 1088defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; 1089defm : LMULReadAdvance<"ReadVICmpV", 0>; 1090defm : LMULReadAdvance<"ReadVICmpX", 0>; 1091defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; 1092defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; 1093defm : LMULReadAdvance<"ReadVIMulV", 0>; 1094defm : LMULReadAdvance<"ReadVIMulX", 0>; 1095defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; 1096defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; 1097defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; 1098defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; 1099defm : LMULReadAdvance<"ReadVIMulAddV", 0>; 1100defm : LMULReadAdvance<"ReadVIMulAddX", 0>; 1101defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; 1102defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; 1103defm : LMULReadAdvance<"ReadVIMergeV", 0>; 1104defm : LMULReadAdvance<"ReadVIMergeX", 0>; 1105defm : LMULReadAdvance<"ReadVIMovV", 0>; 1106defm : LMULReadAdvance<"ReadVIMovX", 0>; 1107 1108// 13. Vector Fixed-Point Arithmetic Instructions 1109defm "" : LMULReadAdvance<"ReadVSALUV", 0>; 1110defm "" : LMULReadAdvance<"ReadVSALUX", 0>; 1111defm "" : LMULReadAdvance<"ReadVAALUV", 0>; 1112defm "" : LMULReadAdvance<"ReadVAALUX", 0>; 1113defm "" : LMULReadAdvance<"ReadVSMulV", 0>; 1114defm "" : LMULReadAdvance<"ReadVSMulX", 0>; 1115defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; 1116defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; 1117defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; 1118defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; 1119 1120// 14. Vector Floating-Point Instructions 1121defm "" : LMULReadAdvance<"ReadVFALUV", 0>; 1122defm "" : LMULReadAdvance<"ReadVFALUF", 0>; 1123defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>; 1124defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>; 1125defm "" : LMULReadAdvance<"ReadVFMulV", 0>; 1126defm "" : LMULReadAdvance<"ReadVFMulF", 0>; 1127defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; 1128defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; 1129defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>; 1130defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>; 1131defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>; 1132defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>; 1133defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>; 1134defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>; 1135defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; 1136defm "" : LMULReadAdvance<"ReadVFRecpV", 0>; 1137defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>; 1138defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>; 1139defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>; 1140defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>; 1141defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; 1142defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; 1143defm "" : LMULReadAdvance<"ReadVFClassV", 0>; 1144defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; 1145defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; 1146defm "" : LMULReadAdvance<"ReadVFMovF", 0>; 1147defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>; 1148defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; 1149defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>; 1150defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; 1151defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>; 1152defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>; 1153defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; 1154defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>; 1155 1156// 15. Vector Reduction Operations 1157def : ReadAdvance<ReadVIRedV, 0>; 1158def : ReadAdvance<ReadVIRedV0, 0>; 1159def : ReadAdvance<ReadVIWRedV, 0>; 1160def : ReadAdvance<ReadVIWRedV0, 0>; 1161def : ReadAdvance<ReadVFRedV, 0>; 1162def : ReadAdvance<ReadVFRedV0, 0>; 1163def : ReadAdvance<ReadVFRedOV, 0>; 1164def : ReadAdvance<ReadVFRedOV0, 0>; 1165def : ReadAdvance<ReadVFWRedV, 0>; 1166def : ReadAdvance<ReadVFWRedV0, 0>; 1167def : ReadAdvance<ReadVFWRedOV, 0>; 1168def : ReadAdvance<ReadVFWRedOV0, 0>; 1169 1170// 16. Vector Mask Instructions 1171defm "" : LMULReadAdvance<"ReadVMALUV", 0>; 1172defm "" : LMULReadAdvance<"ReadVMPopV", 0>; 1173defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; 1174defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; 1175defm "" : LMULReadAdvance<"ReadVMIotV", 0>; 1176 1177// 17. Vector Permutation Instructions 1178def : ReadAdvance<ReadVIMovVX, 0>; 1179def : ReadAdvance<ReadVIMovXV, 0>; 1180def : ReadAdvance<ReadVIMovXX, 0>; 1181def : ReadAdvance<ReadVFMovVF, 0>; 1182def : ReadAdvance<ReadVFMovFV, 0>; 1183def : ReadAdvance<ReadVFMovFX, 0>; 1184defm "" : LMULReadAdvance<"ReadVISlideV", 0>; 1185defm "" : LMULReadAdvance<"ReadVISlideX", 0>; 1186defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; 1187defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; 1188defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; 1189defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; 1190defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; 1191defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; 1192defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; 1193defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; 1194// LMUL Aware 1195def : ReadAdvance<ReadVMov1V, 0>; 1196def : ReadAdvance<ReadVMov2V, 0>; 1197def : ReadAdvance<ReadVMov4V, 0>; 1198def : ReadAdvance<ReadVMov8V, 0>; 1199 1200// Others 1201def : ReadAdvance<ReadVMask, 0>; 1202def : ReadAdvance<ReadVMergeOp_WorstCase, 0>; 1203foreach mx = SchedMxList in { 1204 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>; 1205 foreach sew = SchedSEWSet<mx>.val in 1206 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>; 1207} 1208 1209//===----------------------------------------------------------------------===// 1210// Unsupported extensions 1211defm : UnsupportedSchedZbc; 1212defm : UnsupportedSchedZbkb; 1213defm : UnsupportedSchedZbkx; 1214defm : UnsupportedSchedZfa; 1215} 1216