1*4c2d3b02SDimitry Andric//=- AArch64SchedAmpere1B.td - Ampere-1B scheduling def -----*- tablegen -*-=// 2*4c2d3b02SDimitry Andric// 3*4c2d3b02SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*4c2d3b02SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5*4c2d3b02SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*4c2d3b02SDimitry Andric// 7*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 8*4c2d3b02SDimitry Andric// 9*4c2d3b02SDimitry Andric// This file defines the machine model for the Ampere Computing Ampere-1B to 10*4c2d3b02SDimitry Andric// support instruction scheduling and other instruction cost heuristics. 11*4c2d3b02SDimitry Andric// 12*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 13*4c2d3b02SDimitry Andric 14*4c2d3b02SDimitry Andric// The Ampere-1B core is an out-of-order micro-architecture. The front 15*4c2d3b02SDimitry Andric// end has branch prediction, with a 10-cycle recovery time from a 16*4c2d3b02SDimitry Andric// mispredicted branch. Instructions coming out of the front end are 17*4c2d3b02SDimitry Andric// decoded into internal micro-ops (uops). 18*4c2d3b02SDimitry Andric 19*4c2d3b02SDimitry Andricdef Ampere1BModel : SchedMachineModel { 20*4c2d3b02SDimitry Andric let IssueWidth = 12; // Maximum micro-ops dispatch rate. 21*4c2d3b02SDimitry Andric let MicroOpBufferSize = 192; // micro-op re-order buffer size 22*4c2d3b02SDimitry Andric let LoadLatency = 3; // Optimistic load latency 23*4c2d3b02SDimitry Andric let MispredictPenalty = 10; // Branch mispredict penalty 24*4c2d3b02SDimitry Andric let LoopMicroOpBufferSize = 32; // Instruction queue size 25*4c2d3b02SDimitry Andric let CompleteModel = 1; 26*4c2d3b02SDimitry Andric 27*4c2d3b02SDimitry Andric list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 28*4c2d3b02SDimitry Andric SMEUnsupported.F, 29*4c2d3b02SDimitry Andric PAUnsupported.F); 30*4c2d3b02SDimitry Andric} 31*4c2d3b02SDimitry Andric 32*4c2d3b02SDimitry Andriclet SchedModel = Ampere1BModel in { 33*4c2d3b02SDimitry Andric 34*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 35*4c2d3b02SDimitry Andric// Define each kind of processor resource and number available on Ampere-1B. 36*4c2d3b02SDimitry Andric 37*4c2d3b02SDimitry Andricdef Ampere1BUnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w 38*4c2d3b02SDimitry Andricdef Ampere1BUnitB : ProcResource<2>; // integer single-cycle, and complex shifts 39*4c2d3b02SDimitry Andricdef Ampere1BUnitBS : ProcResource<1>; // integer multi-cycle 40*4c2d3b02SDimitry Andricdef Ampere1BUnitL : ProcResource<2>; // load 41*4c2d3b02SDimitry Andricdef Ampere1BUnitS : ProcResource<2>; // store address calculation 42*4c2d3b02SDimitry Andricdef Ampere1BUnitX : ProcResource<1>; // FP and vector operations, and flag write 43*4c2d3b02SDimitry Andricdef Ampere1BUnitY : ProcResource<1>; // FP and vector operations, and crypto 44*4c2d3b02SDimitry Andricdef Ampere1BUnitZ : ProcResource<1>; // FP store data and FP-to-integer moves 45*4c2d3b02SDimitry Andric 46*4c2d3b02SDimitry Andricdef Ampere1BUnitAB : ProcResGroup<[Ampere1BUnitA, Ampere1BUnitB]>; 47*4c2d3b02SDimitry Andricdef Ampere1BUnitXY : ProcResGroup<[Ampere1BUnitX, Ampere1BUnitY]>; 48*4c2d3b02SDimitry Andric 49*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 50*4c2d3b02SDimitry Andric// Define customized scheduler read/write types specific to the Ampere-1. 51*4c2d3b02SDimitry Andric 52*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1A : SchedWriteRes<[Ampere1BUnitA]> { 53*4c2d3b02SDimitry Andric let Latency = 1; 54*4c2d3b02SDimitry Andric let NumMicroOps = 1; 55*4c2d3b02SDimitry Andric} 56*4c2d3b02SDimitry Andric 57*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_2A : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitA]> { 58*4c2d3b02SDimitry Andric let Latency = 1; 59*4c2d3b02SDimitry Andric let NumMicroOps = 2; 60*4c2d3b02SDimitry Andric} 61*4c2d3b02SDimitry Andric 62*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1B : SchedWriteRes<[Ampere1BUnitB]> { 63*4c2d3b02SDimitry Andric let Latency = 1; 64*4c2d3b02SDimitry Andric let NumMicroOps = 1; 65*4c2d3b02SDimitry Andric} 66*4c2d3b02SDimitry Andric 67*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { 68*4c2d3b02SDimitry Andric let Latency = 1; 69*4c2d3b02SDimitry Andric let NumMicroOps = 1; 70*4c2d3b02SDimitry Andric} 71*4c2d3b02SDimitry Andric 72*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1BS_1B : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitB]> { 73*4c2d3b02SDimitry Andric let Latency = 1; 74*4c2d3b02SDimitry Andric let NumMicroOps = 2; 75*4c2d3b02SDimitry Andric} 76*4c2d3b02SDimitry Andric 77*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1AB : SchedWriteRes<[Ampere1BUnitAB]> { 78*4c2d3b02SDimitry Andric let Latency = 1; 79*4c2d3b02SDimitry Andric let NumMicroOps = 1; 80*4c2d3b02SDimitry Andric} 81*4c2d3b02SDimitry Andric 82*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1AB_1A : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitA]> { 83*4c2d3b02SDimitry Andric let Latency = 1; 84*4c2d3b02SDimitry Andric let NumMicroOps = 2; 85*4c2d3b02SDimitry Andric} 86*4c2d3b02SDimitry Andric 87*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { 88*4c2d3b02SDimitry Andric let Latency = 1; 89*4c2d3b02SDimitry Andric let NumMicroOps = 1; 90*4c2d3b02SDimitry Andric} 91*4c2d3b02SDimitry Andric 92*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1S : SchedWriteRes<[Ampere1BUnitS]> { 93*4c2d3b02SDimitry Andric let Latency = 1; 94*4c2d3b02SDimitry Andric let NumMicroOps = 1; 95*4c2d3b02SDimitry Andric} 96*4c2d3b02SDimitry Andric 97*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_2S : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS]> { 98*4c2d3b02SDimitry Andric let Latency = 1; 99*4c2d3b02SDimitry Andric let NumMicroOps = 2; 100*4c2d3b02SDimitry Andric} 101*4c2d3b02SDimitry Andric 102*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1Y : SchedWriteRes<[Ampere1BUnitY]> { 103*4c2d3b02SDimitry Andric let Latency = 2; 104*4c2d3b02SDimitry Andric let NumMicroOps = 1; 105*4c2d3b02SDimitry Andric} 106*4c2d3b02SDimitry Andric 107*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_2AB : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitAB]> { 108*4c2d3b02SDimitry Andric let Latency = 2; 109*4c2d3b02SDimitry Andric let NumMicroOps = 2; 110*4c2d3b02SDimitry Andric} 111*4c2d3b02SDimitry Andric 112*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1AB : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitAB]> { 113*4c2d3b02SDimitry Andric let Latency = 2; 114*4c2d3b02SDimitry Andric let NumMicroOps = 2; 115*4c2d3b02SDimitry Andric} 116*4c2d3b02SDimitry Andric 117*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1S : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitS]> { 118*4c2d3b02SDimitry Andric let Latency = 2; 119*4c2d3b02SDimitry Andric let NumMicroOps = 2; 120*4c2d3b02SDimitry Andric} 121*4c2d3b02SDimitry Andric 122*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1S_1AB : SchedWriteRes<[Ampere1BUnitB, 123*4c2d3b02SDimitry Andric Ampere1BUnitS, 124*4c2d3b02SDimitry Andric Ampere1BUnitAB]> { 125*4c2d3b02SDimitry Andric let Latency = 2; 126*4c2d3b02SDimitry Andric let NumMicroOps = 3; 127*4c2d3b02SDimitry Andric} 128*4c2d3b02SDimitry Andric 129*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, 130*4c2d3b02SDimitry Andric Ampere1BUnitZ, 131*4c2d3b02SDimitry Andric Ampere1BUnitZ]> { 132*4c2d3b02SDimitry Andric let Latency = 2; 133*4c2d3b02SDimitry Andric let NumMicroOps = 3; 134*4c2d3b02SDimitry Andric} 135*4c2d3b02SDimitry Andric 136*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { 137*4c2d3b02SDimitry Andric let Latency = 2; 138*4c2d3b02SDimitry Andric let NumMicroOps = 1; 139*4c2d3b02SDimitry Andric} 140*4c2d3b02SDimitry Andric 141*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitZ]> { 142*4c2d3b02SDimitry Andric let Latency = 2; 143*4c2d3b02SDimitry Andric let NumMicroOps = 2; 144*4c2d3b02SDimitry Andric} 145*4c2d3b02SDimitry Andric 146*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { 147*4c2d3b02SDimitry Andric let Latency = 3; 148*4c2d3b02SDimitry Andric let NumMicroOps = 1; 149*4c2d3b02SDimitry Andric} 150*4c2d3b02SDimitry Andric 151*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { 152*4c2d3b02SDimitry Andric let Latency = 3; 153*4c2d3b02SDimitry Andric let NumMicroOps = 1; 154*4c2d3b02SDimitry Andric} 155*4c2d3b02SDimitry Andric 156*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 157*4c2d3b02SDimitry Andric let Latency = 3; 158*4c2d3b02SDimitry Andric let NumMicroOps = 1; 159*4c2d3b02SDimitry Andric} 160*4c2d3b02SDimitry Andric 161*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { 162*4c2d3b02SDimitry Andric let Latency = 3; 163*4c2d3b02SDimitry Andric let NumMicroOps = 1; 164*4c2d3b02SDimitry Andric} 165*4c2d3b02SDimitry Andric 166*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1Z : SchedWriteRes<[Ampere1BUnitZ]> { 167*4c2d3b02SDimitry Andric let Latency = 3; 168*4c2d3b02SDimitry Andric let NumMicroOps = 1; 169*4c2d3b02SDimitry Andric} 170*4c2d3b02SDimitry Andric 171*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, 172*4c2d3b02SDimitry Andric Ampere1BUnitZ]> { 173*4c2d3b02SDimitry Andric let Latency = 3; 174*4c2d3b02SDimitry Andric let NumMicroOps = 2; 175*4c2d3b02SDimitry Andric} 176*4c2d3b02SDimitry Andric 177*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, 178*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 179*4c2d3b02SDimitry Andric let Latency = 3; 180*4c2d3b02SDimitry Andric let NumMicroOps = 3; 181*4c2d3b02SDimitry Andric} 182*4c2d3b02SDimitry Andric 183*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_2S_2Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, 184*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 185*4c2d3b02SDimitry Andric let Latency = 3; 186*4c2d3b02SDimitry Andric let NumMicroOps = 4; 187*4c2d3b02SDimitry Andric} 188*4c2d3b02SDimitry Andric 189*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1BS_1AB : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitAB]> { 190*4c2d3b02SDimitry Andric let Latency = 4; 191*4c2d3b02SDimitry Andric let NumMicroOps = 2; 192*4c2d3b02SDimitry Andric} 193*4c2d3b02SDimitry Andric 194*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { 195*4c2d3b02SDimitry Andric let Latency = 4; 196*4c2d3b02SDimitry Andric let NumMicroOps = 1; 197*4c2d3b02SDimitry Andric} 198*4c2d3b02SDimitry Andric 199*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_2L : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL]> { 200*4c2d3b02SDimitry Andric let Latency = 4; 201*4c2d3b02SDimitry Andric let NumMicroOps = 2; 202*4c2d3b02SDimitry Andric} 203*4c2d3b02SDimitry Andric 204*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1L_1B : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitB]> { 205*4c2d3b02SDimitry Andric let Latency = 4; 206*4c2d3b02SDimitry Andric let NumMicroOps = 2; 207*4c2d3b02SDimitry Andric} 208*4c2d3b02SDimitry Andric 209*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 210*4c2d3b02SDimitry Andric let Latency = 4; 211*4c2d3b02SDimitry Andric let NumMicroOps = 1; 212*4c2d3b02SDimitry Andric} 213*4c2d3b02SDimitry Andric 214*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { 215*4c2d3b02SDimitry Andric let Latency = 4; 216*4c2d3b02SDimitry Andric let NumMicroOps = 1; 217*4c2d3b02SDimitry Andric} 218*4c2d3b02SDimitry Andric 219*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { 220*4c2d3b02SDimitry Andric let Latency = 4; 221*4c2d3b02SDimitry Andric let NumMicroOps = 2; 222*4c2d3b02SDimitry Andric} 223*4c2d3b02SDimitry Andric 224*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { 225*4c2d3b02SDimitry Andric let Latency = 5; 226*4c2d3b02SDimitry Andric let NumMicroOps = 1; 227*4c2d3b02SDimitry Andric} 228*4c2d3b02SDimitry Andric 229*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1BUnitXY, 230*4c2d3b02SDimitry Andric Ampere1BUnitS, 231*4c2d3b02SDimitry Andric Ampere1BUnitZ]> { 232*4c2d3b02SDimitry Andric let Latency = 4; 233*4c2d3b02SDimitry Andric let NumMicroOps = 3; 234*4c2d3b02SDimitry Andric} 235*4c2d3b02SDimitry Andric 236*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_3S_3Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, 237*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitZ, 238*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 239*4c2d3b02SDimitry Andric let Latency = 4; 240*4c2d3b02SDimitry Andric let NumMicroOps = 6; 241*4c2d3b02SDimitry Andric} 242*4c2d3b02SDimitry Andric 243*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_4S_4Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, 244*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 245*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, 246*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 247*4c2d3b02SDimitry Andric let Latency = 5; 248*4c2d3b02SDimitry Andric let NumMicroOps = 8; 249*4c2d3b02SDimitry Andric} 250*4c2d3b02SDimitry Andric 251*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1L_1BS : SchedWriteRes<[Ampere1BUnitL, 252*4c2d3b02SDimitry Andric Ampere1BUnitBS]> { 253*4c2d3b02SDimitry Andric let Latency = 5; 254*4c2d3b02SDimitry Andric let NumMicroOps = 2; 255*4c2d3b02SDimitry Andric} 256*4c2d3b02SDimitry Andric 257*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_3L : SchedWriteRes<[Ampere1BUnitL, 258*4c2d3b02SDimitry Andric Ampere1BUnitL, 259*4c2d3b02SDimitry Andric Ampere1BUnitL]> { 260*4c2d3b02SDimitry Andric let Latency = 5; 261*4c2d3b02SDimitry Andric let NumMicroOps = 3; 262*4c2d3b02SDimitry Andric} 263*4c2d3b02SDimitry Andric 264*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_4L : SchedWriteRes<[Ampere1BUnitL, 265*4c2d3b02SDimitry Andric Ampere1BUnitL, 266*4c2d3b02SDimitry Andric Ampere1BUnitL, 267*4c2d3b02SDimitry Andric Ampere1BUnitL]> { 268*4c2d3b02SDimitry Andric let Latency = 5; 269*4c2d3b02SDimitry Andric let NumMicroOps = 4; 270*4c2d3b02SDimitry Andric} 271*4c2d3b02SDimitry Andric 272*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 273*4c2d3b02SDimitry Andric let Latency = 5; 274*4c2d3b02SDimitry Andric let NumMicroOps = 1; 275*4c2d3b02SDimitry Andric} 276*4c2d3b02SDimitry Andric 277*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 278*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 279*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 280*4c2d3b02SDimitry Andric let Latency = 5; 281*4c2d3b02SDimitry Andric let NumMicroOps = 6; 282*4c2d3b02SDimitry Andric} 283*4c2d3b02SDimitry Andric 284*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1BS_1A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA]> { 285*4c2d3b02SDimitry Andric let Latency = 6; 286*4c2d3b02SDimitry Andric let NumMicroOps = 2; 287*4c2d3b02SDimitry Andric} 288*4c2d3b02SDimitry Andric 289*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1BS_2A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA, 290*4c2d3b02SDimitry Andric Ampere1BUnitA]> { 291*4c2d3b02SDimitry Andric let Latency = 6; 292*4c2d3b02SDimitry Andric let NumMicroOps = 3; 293*4c2d3b02SDimitry Andric} 294*4c2d3b02SDimitry Andric 295*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1L_1XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitXY]> { 296*4c2d3b02SDimitry Andric let Latency = 6; 297*4c2d3b02SDimitry Andric let NumMicroOps = 2; 298*4c2d3b02SDimitry Andric} 299*4c2d3b02SDimitry Andric 300*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2L_2XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 301*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 302*4c2d3b02SDimitry Andric let Latency = 6; 303*4c2d3b02SDimitry Andric let NumMicroOps = 4; 304*4c2d3b02SDimitry Andric} 305*4c2d3b02SDimitry Andric 306*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 307*4c2d3b02SDimitry Andric let Latency = 6; 308*4c2d3b02SDimitry Andric let NumMicroOps = 2; 309*4c2d3b02SDimitry Andric} 310*4c2d3b02SDimitry Andric 311*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { 312*4c2d3b02SDimitry Andric let Latency = 6; 313*4c2d3b02SDimitry Andric let NumMicroOps = 2; 314*4c2d3b02SDimitry Andric} 315*4c2d3b02SDimitry Andric 316*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 317*4c2d3b02SDimitry Andric Ampere1BUnitXY]> { 318*4c2d3b02SDimitry Andric let Latency = 6; 319*4c2d3b02SDimitry Andric let NumMicroOps = 3; 320*4c2d3b02SDimitry Andric} 321*4c2d3b02SDimitry Andric 322*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 323*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 324*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 325*4c2d3b02SDimitry Andric let Latency = 6; 326*4c2d3b02SDimitry Andric let NumMicroOps = 6; 327*4c2d3b02SDimitry Andric} 328*4c2d3b02SDimitry Andric 329*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY, 330*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, Ampere1BUnitS, 331*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, Ampere1BUnitZ]> { 332*4c2d3b02SDimitry Andric let Latency = 6; 333*4c2d3b02SDimitry Andric let NumMicroOps = 9; 334*4c2d3b02SDimitry Andric} 335*4c2d3b02SDimitry Andric 336*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { 337*4c2d3b02SDimitry Andric let Latency = 7; 338*4c2d3b02SDimitry Andric let NumMicroOps = 2; 339*4c2d3b02SDimitry Andric} 340*4c2d3b02SDimitry Andric 341*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1XY_1Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitZ]> { 342*4c2d3b02SDimitry Andric let Latency = 7; 343*4c2d3b02SDimitry Andric let NumMicroOps = 2; 344*4c2d3b02SDimitry Andric} 345*4c2d3b02SDimitry Andric 346*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1X_1Z : SchedWriteRes<[Ampere1BUnitX, Ampere1BUnitZ]> { 347*4c2d3b02SDimitry Andric let Latency = 7; 348*4c2d3b02SDimitry Andric let NumMicroOps = 2; 349*4c2d3b02SDimitry Andric} 350*4c2d3b02SDimitry Andric 351*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 352*4c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitXY, 353*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 354*4c2d3b02SDimitry Andric let Latency = 7; 355*4c2d3b02SDimitry Andric let NumMicroOps = 6; 356*4c2d3b02SDimitry Andric} 357*4c2d3b02SDimitry Andric 358*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 359*4c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 360*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 361*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 362*4c2d3b02SDimitry Andric let Latency = 7; 363*4c2d3b02SDimitry Andric let NumMicroOps = 8; 364*4c2d3b02SDimitry Andric} 365*4c2d3b02SDimitry Andric 366*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 367*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 368*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 369*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 370*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, 371*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 372*4c2d3b02SDimitry Andric let Latency = 7; 373*4c2d3b02SDimitry Andric let NumMicroOps = 12; 374*4c2d3b02SDimitry Andric} 375*4c2d3b02SDimitry Andric 376*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_1BS_1L : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitL]> { 377*4c2d3b02SDimitry Andric let Latency = 8; 378*4c2d3b02SDimitry Andric let NumMicroOps = 2; 379*4c2d3b02SDimitry Andric} 380*4c2d3b02SDimitry Andric 381*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { 382*4c2d3b02SDimitry Andric let Latency = 8; 383*4c2d3b02SDimitry Andric let NumMicroOps = 2; 384*4c2d3b02SDimitry Andric} 385*4c2d3b02SDimitry Andric 386*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_2L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 387*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 388*4c2d3b02SDimitry Andric Ampere1BUnitXY]> { 389*4c2d3b02SDimitry Andric let Latency = 8; 390*4c2d3b02SDimitry Andric let NumMicroOps = 5; 391*4c2d3b02SDimitry Andric} 392*4c2d3b02SDimitry Andric 393*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 394*4c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitXY, 395*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 396*4c2d3b02SDimitry Andric let Latency = 8; 397*4c2d3b02SDimitry Andric let NumMicroOps = 6; 398*4c2d3b02SDimitry Andric} 399*4c2d3b02SDimitry Andric 400*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 401*4c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 402*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 403*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 404*4c2d3b02SDimitry Andric let Latency = 8; 405*4c2d3b02SDimitry Andric let NumMicroOps = 8; 406*4c2d3b02SDimitry Andric} 407*4c2d3b02SDimitry Andric 408*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { 409*4c2d3b02SDimitry Andric let Latency = 8; 410*4c2d3b02SDimitry Andric let NumMicroOps = 2; 411*4c2d3b02SDimitry Andric} 412*4c2d3b02SDimitry Andric 413*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_4XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 414*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 415*4c2d3b02SDimitry Andric let Latency = 8; 416*4c2d3b02SDimitry Andric let NumMicroOps = 4; 417*4c2d3b02SDimitry Andric} 418*4c2d3b02SDimitry Andric 419*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 420*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 421*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 422*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 423*4c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 424*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, 425*4c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 426*4c2d3b02SDimitry Andric let Latency = 9; 427*4c2d3b02SDimitry Andric let NumMicroOps = 14; 428*4c2d3b02SDimitry Andric} 429*4c2d3b02SDimitry Andric 430*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1A_1BS_1X : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitX]> { 431*4c2d3b02SDimitry Andric let Latency = 9; 432*4c2d3b02SDimitry Andric let NumMicroOps = 3; 433*4c2d3b02SDimitry Andric} 434*4c2d3b02SDimitry Andric 435*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitXY]> { 436*4c2d3b02SDimitry Andric let Latency = 9; 437*4c2d3b02SDimitry Andric let NumMicroOps = 3; 438*4c2d3b02SDimitry Andric} 439*4c2d3b02SDimitry Andric 440*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 441*4c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitXY, 442*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 443*4c2d3b02SDimitry Andric let Latency = 9; 444*4c2d3b02SDimitry Andric let NumMicroOps = 6; 445*4c2d3b02SDimitry Andric} 446*4c2d3b02SDimitry Andric 447*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 448*4c2d3b02SDimitry Andric let Latency = 9; 449*4c2d3b02SDimitry Andric let NumMicroOps = 1; 450*4c2d3b02SDimitry Andric} 451*4c2d3b02SDimitry Andric 452*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY]> { 453*4c2d3b02SDimitry Andric let Latency = 9; 454*4c2d3b02SDimitry Andric let NumMicroOps = 3; 455*4c2d3b02SDimitry Andric} 456*4c2d3b02SDimitry Andric 457*4c2d3b02SDimitry Andricdef Ampere1BWrite_10cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 458*4c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 459*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 460*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 461*4c2d3b02SDimitry Andric let Latency = 10; 462*4c2d3b02SDimitry Andric let NumMicroOps = 12; 463*4c2d3b02SDimitry Andric} 464*4c2d3b02SDimitry Andric 465*4c2d3b02SDimitry Andricdef Ampere1BWrite_11cyc_1BS_2XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY, Ampere1BUnitXY]> { 466*4c2d3b02SDimitry Andric let Latency = 11; 467*4c2d3b02SDimitry Andric let NumMicroOps = 3; 468*4c2d3b02SDimitry Andric} 469*4c2d3b02SDimitry Andric 470*4c2d3b02SDimitry Andricdef Ampere1BWrite_11cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 471*4c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 472*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 473*4c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 474*4c2d3b02SDimitry Andric let Latency = 11; 475*4c2d3b02SDimitry Andric let NumMicroOps = 12; 476*4c2d3b02SDimitry Andric} 477*4c2d3b02SDimitry Andric 478*4c2d3b02SDimitry Andricdef Ampere1BWrite_12cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 479*4c2d3b02SDimitry Andric let Latency = 12; 480*4c2d3b02SDimitry Andric let NumMicroOps = 1; 481*4c2d3b02SDimitry Andric} 482*4c2d3b02SDimitry Andric 483*4c2d3b02SDimitry Andricdef Ampere1BWrite_13cyc_1BS_1X : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitX]> { 484*4c2d3b02SDimitry Andric let Latency = 13; 485*4c2d3b02SDimitry Andric let NumMicroOps = 2; 486*4c2d3b02SDimitry Andric} 487*4c2d3b02SDimitry Andric 488*4c2d3b02SDimitry Andricdef Ampere1BWrite_17cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 489*4c2d3b02SDimitry Andric let Latency = 17; 490*4c2d3b02SDimitry Andric let NumMicroOps = 1; 491*4c2d3b02SDimitry Andric} 492*4c2d3b02SDimitry Andric 493*4c2d3b02SDimitry Andricdef Ampere1BWrite_19cyc_2BS_1X : SchedWriteRes<[Ampere1BUnitBS, 494*4c2d3b02SDimitry Andric Ampere1BUnitBS, 495*4c2d3b02SDimitry Andric Ampere1BUnitX]> { 496*4c2d3b02SDimitry Andric let Latency = 13; 497*4c2d3b02SDimitry Andric let NumMicroOps = 3; 498*4c2d3b02SDimitry Andric} 499*4c2d3b02SDimitry Andric 500*4c2d3b02SDimitry Andricdef Ampere1BWrite_19cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 501*4c2d3b02SDimitry Andric let Latency = 19; 502*4c2d3b02SDimitry Andric let NumMicroOps = 1; 503*4c2d3b02SDimitry Andric} 504*4c2d3b02SDimitry Andric 505*4c2d3b02SDimitry Andricdef Ampere1BWrite_21cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 506*4c2d3b02SDimitry Andric let Latency = 21; 507*4c2d3b02SDimitry Andric let NumMicroOps = 1; 508*4c2d3b02SDimitry Andric} 509*4c2d3b02SDimitry Andric 510*4c2d3b02SDimitry Andricdef Ampere1BWrite_33cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 511*4c2d3b02SDimitry Andric let Latency = 33; 512*4c2d3b02SDimitry Andric let NumMicroOps = 1; 513*4c2d3b02SDimitry Andric} 514*4c2d3b02SDimitry Andric 515*4c2d3b02SDimitry Andricdef Ampere1BWrite_39cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 516*4c2d3b02SDimitry Andric let Latency = 39; 517*4c2d3b02SDimitry Andric let NumMicroOps = 1; 518*4c2d3b02SDimitry Andric} 519*4c2d3b02SDimitry Andric 520*4c2d3b02SDimitry Andricdef Ampere1BWrite_63cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 521*4c2d3b02SDimitry Andric let Latency = 63; 522*4c2d3b02SDimitry Andric let NumMicroOps = 1; 523*4c2d3b02SDimitry Andric} 524*4c2d3b02SDimitry Andric 525*4c2d3b02SDimitry Andric// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), 526*4c2d3b02SDimitry Andric// which are a single uop, and for extended registers, which have full flexibility 527*4c2d3b02SDimitry Andric// across Unit A or B for both uops. 528*4c2d3b02SDimitry Andricdef Ampere1BWrite_Arith : SchedWriteVariant<[ 529*4c2d3b02SDimitry Andric SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, 530*4c2d3b02SDimitry Andric SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, 531*4c2d3b02SDimitry Andric SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; 532*4c2d3b02SDimitry Andric 533*4c2d3b02SDimitry Andricdef Ampere1BWrite_ArithFlagsetting : SchedWriteVariant<[ 534*4c2d3b02SDimitry Andric SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, 535*4c2d3b02SDimitry Andric SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, 536*4c2d3b02SDimitry Andric SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; 537*4c2d3b02SDimitry Andric 538*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 539*4c2d3b02SDimitry Andric// Map the target-defined scheduler read/write resources and latencies for Ampere-1. 540*4c2d3b02SDimitry Andric// This provides a coarse model, which is then specialised below. 541*4c2d3b02SDimitry Andric 542*4c2d3b02SDimitry Andricdef : WriteRes<WriteImm, [Ampere1BUnitAB]>; // MOVN, MOVZ 543*4c2d3b02SDimitry Andricdef : WriteRes<WriteI, [Ampere1BUnitAB]>; // ALU 544*4c2d3b02SDimitry Andricdef : WriteRes<WriteISReg, [Ampere1BUnitB, Ampere1BUnitAB]> { 545*4c2d3b02SDimitry Andric let Latency = 2; 546*4c2d3b02SDimitry Andric let NumMicroOps = 2; 547*4c2d3b02SDimitry Andric} // ALU of Shifted-Reg 548*4c2d3b02SDimitry Andricdef : WriteRes<WriteIEReg, [Ampere1BUnitAB, Ampere1BUnitAB]> { 549*4c2d3b02SDimitry Andric let Latency = 2; 550*4c2d3b02SDimitry Andric let NumMicroOps = 2; 551*4c2d3b02SDimitry Andric} // ALU of Extended-Reg 552*4c2d3b02SDimitry Andricdef : WriteRes<WriteExtr, [Ampere1BUnitB]>; // EXTR shifts a reg pair 553*4c2d3b02SDimitry Andricdef : WriteRes<WriteIS, [Ampere1BUnitB]>; // Shift/Scale 554*4c2d3b02SDimitry Andricdef : WriteRes<WriteID32, [Ampere1BUnitBS, Ampere1BUnitX]> { 555*4c2d3b02SDimitry Andric let Latency = 13; 556*4c2d3b02SDimitry Andric} // 32-bit Divide 557*4c2d3b02SDimitry Andricdef : WriteRes<WriteID64, [Ampere1BUnitBS, Ampere1BUnitX]> { 558*4c2d3b02SDimitry Andric let Latency = 19; 559*4c2d3b02SDimitry Andric} // 64-bit Divide 560*4c2d3b02SDimitry Andricdef : WriteRes<WriteIM32, [Ampere1BUnitBS]> { 561*4c2d3b02SDimitry Andric let Latency = 3; 562*4c2d3b02SDimitry Andric} // 32-bit Multiply 563*4c2d3b02SDimitry Andricdef : WriteRes<WriteIM64, [Ampere1BUnitBS, Ampere1BUnitAB]> { 564*4c2d3b02SDimitry Andric let Latency = 3; 565*4c2d3b02SDimitry Andric} // 64-bit Multiply 566*4c2d3b02SDimitry Andricdef : WriteRes<WriteBr, [Ampere1BUnitA]>; 567*4c2d3b02SDimitry Andricdef : WriteRes<WriteBrReg, [Ampere1BUnitA, Ampere1BUnitA]>; 568*4c2d3b02SDimitry Andricdef : WriteRes<WriteLD, [Ampere1BUnitL]> { 569*4c2d3b02SDimitry Andric let Latency = 3; 570*4c2d3b02SDimitry Andric} // Load from base addr plus immediate offset 571*4c2d3b02SDimitry Andricdef : WriteRes<WriteST, [Ampere1BUnitS]> { 572*4c2d3b02SDimitry Andric let Latency = 1; 573*4c2d3b02SDimitry Andric} // Store to base addr plus immediate offset 574*4c2d3b02SDimitry Andricdef : WriteRes<WriteSTP, [Ampere1BUnitS, Ampere1BUnitS]> { 575*4c2d3b02SDimitry Andric let Latency = 1; 576*4c2d3b02SDimitry Andric let NumMicroOps = 1; 577*4c2d3b02SDimitry Andric} // Store a register pair. 578*4c2d3b02SDimitry Andricdef : WriteRes<WriteAdr, [Ampere1BUnitAB]>; 579*4c2d3b02SDimitry Andricdef : WriteRes<WriteLDIdx, [Ampere1BUnitAB, Ampere1BUnitS]> { 580*4c2d3b02SDimitry Andric let Latency = 3; 581*4c2d3b02SDimitry Andric let NumMicroOps = 1; 582*4c2d3b02SDimitry Andric} // Load from a register index (maybe scaled). 583*4c2d3b02SDimitry Andricdef : WriteRes<WriteSTIdx, [Ampere1BUnitS, Ampere1BUnitS]> { 584*4c2d3b02SDimitry Andric let Latency = 1; 585*4c2d3b02SDimitry Andric let NumMicroOps = 2; 586*4c2d3b02SDimitry Andric} // Store to a register index (maybe scaled). 587*4c2d3b02SDimitry Andricdef : WriteRes<WriteF, [Ampere1BUnitXY]> { 588*4c2d3b02SDimitry Andric let Latency = 2; 589*4c2d3b02SDimitry Andric} // General floating-point ops. 590*4c2d3b02SDimitry Andricdef : WriteRes<WriteFCmp, [Ampere1BUnitX]> { 591*4c2d3b02SDimitry Andric let Latency = 3; 592*4c2d3b02SDimitry Andric} // Floating-point compare. 593*4c2d3b02SDimitry Andricdef : WriteRes<WriteFCvt, [Ampere1BUnitXY]> { 594*4c2d3b02SDimitry Andric let Latency = 3; 595*4c2d3b02SDimitry Andric} // Float conversion. 596*4c2d3b02SDimitry Andricdef : WriteRes<WriteFCopy, [Ampere1BUnitXY]> { 597*4c2d3b02SDimitry Andric} // Float-int register copy. 598*4c2d3b02SDimitry Andricdef : WriteRes<WriteFImm, [Ampere1BUnitXY]> { 599*4c2d3b02SDimitry Andric let Latency = 2; 600*4c2d3b02SDimitry Andric} // Float-int register copy. 601*4c2d3b02SDimitry Andricdef : WriteRes<WriteFMul, [Ampere1BUnitXY]> { 602*4c2d3b02SDimitry Andric let Latency = 4; 603*4c2d3b02SDimitry Andric} // Floating-point multiply. 604*4c2d3b02SDimitry Andricdef : WriteRes<WriteFDiv, [Ampere1BUnitXY]> { 605*4c2d3b02SDimitry Andric let Latency = 19; 606*4c2d3b02SDimitry Andric} // Floating-point division. 607*4c2d3b02SDimitry Andricdef : WriteRes<WriteVd, [Ampere1BUnitXY]> { 608*4c2d3b02SDimitry Andric let Latency = 3; 609*4c2d3b02SDimitry Andric} // 64bit Vector D ops. 610*4c2d3b02SDimitry Andricdef : WriteRes<WriteVq, [Ampere1BUnitXY]> { 611*4c2d3b02SDimitry Andric let Latency = 3; 612*4c2d3b02SDimitry Andric} // 128bit Vector Q ops. 613*4c2d3b02SDimitry Andricdef : WriteRes<WriteVLD, [Ampere1BUnitL, Ampere1BUnitL]> { 614*4c2d3b02SDimitry Andric let Latency = 4; 615*4c2d3b02SDimitry Andric} // Vector loads. 616*4c2d3b02SDimitry Andricdef : WriteRes<WriteVST, [Ampere1BUnitS, Ampere1BUnitZ]> { 617*4c2d3b02SDimitry Andric let Latency = 2; 618*4c2d3b02SDimitry Andric} // Vector stores. 619*4c2d3b02SDimitry Andric 620*4c2d3b02SDimitry Andricdef : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 621*4c2d3b02SDimitry Andric 622*4c2d3b02SDimitry Andricdef : WriteRes<WriteSys, []> { let Latency = 1; } 623*4c2d3b02SDimitry Andricdef : WriteRes<WriteBarrier, []> { let Latency = 1; } 624*4c2d3b02SDimitry Andricdef : WriteRes<WriteHint, []> { let Latency = 1; } 625*4c2d3b02SDimitry Andric 626*4c2d3b02SDimitry Andricdef : WriteRes<WriteLDHi, []> { 627*4c2d3b02SDimitry Andric let Latency = 3; 628*4c2d3b02SDimitry Andric} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP 629*4c2d3b02SDimitry Andric 630*4c2d3b02SDimitry Andric// Forwarding logic. 631*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadI, 0>; 632*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadISReg, 0>; 633*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadIEReg, 0>; 634*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadIM, 0>; 635*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>; 636*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadID, 0>; 637*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadExtrHi, 0>; 638*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadST, 0>; 639*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadAdrBase, 0>; 640*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadVLD, 0>; 641*4c2d3b02SDimitry Andric 642*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 643*4c2d3b02SDimitry Andric// Specialising the scheduling model further for Ampere-1B. 644*4c2d3b02SDimitry Andric 645*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs COPY)>; 646*4c2d3b02SDimitry Andric 647*4c2d3b02SDimitry Andric// Branch instructions 648*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs Bcc, BL, RET)>; 649*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 650*4c2d3b02SDimitry Andric (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; 651*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2A], (instrs BLR)>; 652*4c2d3b02SDimitry Andric 653*4c2d3b02SDimitry Andric// Common Short Sequence Compression (CSSC) 654*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instregex "^ABS[WX]")>; 655*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CNT[WX]")>; 656*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "^CTZ[WX]")>; 657*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB_1A], (instregex "^[SU](MAX|MIN)[WX]")>; 658*4c2d3b02SDimitry Andric 659*4c2d3b02SDimitry Andric// Cryptography instructions 660*4c2d3b02SDimitry Andric// -- AES encryption/decryption 661*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AES[DE]")>; 662*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AESI?MC")>; 663*4c2d3b02SDimitry Andric// -- Polynomial multiplication 664*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; 665*4c2d3b02SDimitry Andric// -- SHA-256 hash 666*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA256(H|H2)")>; 667*4c2d3b02SDimitry Andric// -- SHA-256 schedule update 668*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA256SU[01]")>; 669*4c2d3b02SDimitry Andric// -- SHA-3 instructions 670*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 671*4c2d3b02SDimitry Andric (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; 672*4c2d3b02SDimitry Andric// -- SHA-512 hash 673*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA512(H|H2)")>; 674*4c2d3b02SDimitry Andric// -- SHA-512 schedule update 675*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA512SU[01]")>; 676*4c2d3b02SDimitry Andric// -- SHA1 choose/majority/parity 677*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA1[CMP]")>; 678*4c2d3b02SDimitry Andric// -- SHA1 hash/schedule update 679*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1SU[01]")>; 680*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1H")>; 681*4c2d3b02SDimitry Andric// -- SM3 hash 682*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 683*4c2d3b02SDimitry Andric (instregex "^SM3PARTW[12]$", "^SM3SS1$", "^SM3TT[12][AB]$")>; 684*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instrs SM4E, SM4ENCKEY)>; 685*4c2d3b02SDimitry Andric 686*4c2d3b02SDimitry Andric// FP and vector load instructions 687*4c2d3b02SDimitry Andric// -- Load 1-element structure to one/all lanes 688*4c2d3b02SDimitry Andric// ---- all lanes 689*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1L_1XY], 690*4c2d3b02SDimitry Andric (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; 691*4c2d3b02SDimitry Andric// ---- one lane 692*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1L_1XY], 693*4c2d3b02SDimitry Andric (instregex "^LD1i(8|16|32|64)")>; 694*4c2d3b02SDimitry Andric// -- Load 1-element structure to one/all lanes, 1D size 695*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], 696*4c2d3b02SDimitry Andric (instregex "^LD1Rv1d")>; 697*4c2d3b02SDimitry Andric// -- Load 1-element structures to 1 register 698*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], 699*4c2d3b02SDimitry Andric (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 700*4c2d3b02SDimitry Andric// -- Load 1-element structures to 2 registers 701*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L], 702*4c2d3b02SDimitry Andric (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 703*4c2d3b02SDimitry Andric// -- Load 1-element structures to 3 registers 704*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_3L], 705*4c2d3b02SDimitry Andric (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 706*4c2d3b02SDimitry Andric// -- Load 1-element structures to 4 registers 707*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4L], 708*4c2d3b02SDimitry Andric (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 709*4c2d3b02SDimitry Andric// -- Load 2-element structure to all lanes of 2 registers, 1D size 710*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L], 711*4c2d3b02SDimitry Andric (instregex "^LD2Rv1d")>; 712*4c2d3b02SDimitry Andric// -- Load 2-element structure to all lanes of 2 registers, other sizes 713*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY], 714*4c2d3b02SDimitry Andric (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; 715*4c2d3b02SDimitry Andric// -- Load 2-element structure to one lane of 2 registers 716*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY], 717*4c2d3b02SDimitry Andric (instregex "^LD2i(8|16|32|64)")>; 718*4c2d3b02SDimitry Andric// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size 719*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY], 720*4c2d3b02SDimitry Andric (instregex "^LD2Twov(16b|8h|4s|2d)")>; 721*4c2d3b02SDimitry Andric// -- Load 2-element structures to 2 registers, 8B/4H/2S size 722*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_2L_3XY], 723*4c2d3b02SDimitry Andric (instregex "^LD2Twov(8b|4h|2s)")>; 724*4c2d3b02SDimitry Andric// -- Load 3-element structure to all lanes of 3 registers, 1D size 725*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_3L], 726*4c2d3b02SDimitry Andric (instregex "^LD3Rv1d")>; 727*4c2d3b02SDimitry Andric// -- Load 3-element structure to all lanes of 3 registers, other sizes 728*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY], 729*4c2d3b02SDimitry Andric (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; 730*4c2d3b02SDimitry Andric// -- Load 3-element structure to one lane of 3 registers 731*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY], 732*4c2d3b02SDimitry Andric (instregex "^LD3i(8|16|32|64)")>; 733*4c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes 734*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_3L_3XY], 735*4c2d3b02SDimitry Andric (instregex "^LD3Threev(16b|8h|4s)")>; 736*4c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 2D size 737*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY], 738*4c2d3b02SDimitry Andric (instregex "^LD3Threev2d")>; 739*4c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes 740*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_3L_3XY], 741*4c2d3b02SDimitry Andric (instregex "^LD3Threev(8b|4h|2s)")>; 742*4c2d3b02SDimitry Andric// -- Load 4-element structure to all lanes of 4 registers, 1D size 743*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4L], 744*4c2d3b02SDimitry Andric (instregex "^LD4Rv1d")>; 745*4c2d3b02SDimitry Andric// -- Load 4-element structure to all lanes of 4 registers, other sizes 746*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4L_4XY], 747*4c2d3b02SDimitry Andric (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; 748*4c2d3b02SDimitry Andric// -- Load 4-element structure to one lane of 4 registers 749*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4L_4XY], 750*4c2d3b02SDimitry Andric (instregex "^LD4i(8|16|32|64)")>; 751*4c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, 2D size 752*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4L_4XY], 753*4c2d3b02SDimitry Andric (instregex "^LD4Fourv2d")>; 754*4c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, 2S size 755*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_4L_8XY], 756*4c2d3b02SDimitry Andric (instregex "^LD4Fourv2s")>; 757*4c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, other sizes 758*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_10cyc_4L_8XY], 759*4c2d3b02SDimitry Andric (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; 760*4c2d3b02SDimitry Andric// -- Load pair, Q-form 761*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L], (instregex "LDN?PQ")>; 762*4c2d3b02SDimitry Andric// -- Load pair, S/D-form 763*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; 764*4c2d3b02SDimitry Andric// -- Load register 765*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDU?R[BHSDQ]i")>; 766*4c2d3b02SDimitry Andric// -- Load register, sign-extended register 767*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; 768*4c2d3b02SDimitry Andric 769*4c2d3b02SDimitry Andric// FP and vector store instructions 770*4c2d3b02SDimitry Andric// -- Store 1-element structure from one lane of 1 register 771*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY_1S_1Z], 772*4c2d3b02SDimitry Andric (instregex "^ST1i(8|16|32|64)")>; 773*4c2d3b02SDimitry Andric// -- Store 1-element structures from 1 register 774*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], 775*4c2d3b02SDimitry Andric (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 776*4c2d3b02SDimitry Andric// -- Store 1-element structures from 2 registers 777*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], 778*4c2d3b02SDimitry Andric (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 779*4c2d3b02SDimitry Andric// -- Store 1-element structures from 3 registers 780*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_3S_3Z], 781*4c2d3b02SDimitry Andric (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 782*4c2d3b02SDimitry Andric// -- Store 1-element structures from 4 registers 783*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4S_4Z], 784*4c2d3b02SDimitry Andric (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 785*4c2d3b02SDimitry Andric// -- Store 2-element structure from one lane of 2 registers 786*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], 787*4c2d3b02SDimitry Andric (instregex "^ST2i(8|16|32|64)")>; 788*4c2d3b02SDimitry Andric// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes 789*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], 790*4c2d3b02SDimitry Andric (instregex "^ST2Twov(16b|8h|4s|2d)")>; 791*4c2d3b02SDimitry Andric// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes 792*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY_2S_2Z], 793*4c2d3b02SDimitry Andric (instregex "^ST2Twov(8b|4h|2s)")>; 794*4c2d3b02SDimitry Andric// -- Store 3-element structure from one lane of 3 registers 795*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], 796*4c2d3b02SDimitry Andric (instregex "^ST3i(8|16|32|64)")>; 797*4c2d3b02SDimitry Andric// -- Store 3-element structures from 3 registers 798*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], 799*4c2d3b02SDimitry Andric (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 800*4c2d3b02SDimitry Andric// -- Store 4-element structure from one lane of 4 registers 801*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], 802*4c2d3b02SDimitry Andric (instregex "^ST4i(8|16|32|64)")>; 803*4c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes 804*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], 805*4c2d3b02SDimitry Andric (instregex "^ST4Fourv(16b|8h|4s)")>; 806*4c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 2D sizes 807*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], 808*4c2d3b02SDimitry Andric (instregex "^ST4Fourv2d")>; 809*4c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes 810*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_6XY_4S_4Z], 811*4c2d3b02SDimitry Andric (instregex "^ST4Fourv(8b|4h|2s)")>; 812*4c2d3b02SDimitry Andric// -- Store pair, Q-form 813*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?PQ")>; 814*4c2d3b02SDimitry Andric// -- Store pair, S/D-form 815*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?P[SD]")>; 816*4c2d3b02SDimitry Andric// -- Store register 817*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; 818*4c2d3b02SDimitry Andric// -- Store register, sign-extended register offset 819*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; 820*4c2d3b02SDimitry Andric 821*4c2d3b02SDimitry Andric// FP data processing, bfloat16 format 822*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFCVT)>; 823*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_2XY], (instrs BFCVTN, BFCVTN2)>; 824*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; 825*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFMMLA)>; 826*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^BFMLAL")>; 827*4c2d3b02SDimitry Andric 828*4c2d3b02SDimitry Andric// FP data processing, scalar/vector, half precision 829*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; 830*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 831*4c2d3b02SDimitry Andric (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; 832*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 833*4c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; 834*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 835*4c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; 836*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1X], 837*4c2d3b02SDimitry Andric (instregex "^FCMPE?H")>; 838*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], 839*4c2d3b02SDimitry Andric (instregex "^FCCMPE?H")>; 840*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], 841*4c2d3b02SDimitry Andric (instregex "^FCSELH")>; 842*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; 843*4c2d3b02SDimitry Andric// Convert FP to integer, H-form 844*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi]16")>; 845*4c2d3b02SDimitry Andric// Convert to FP from GPR, H-form 846*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]toH$")>; 847*4c2d3b02SDimitry Andric// Convert to FP from GPR, fixed-point, H-form 848*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX]Hri$")>; 849*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1X], (instrs FDIVHrr)>; 850*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_17cyc_1X], (instregex "^FDIVv.[if]16")>; 851*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; 852*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; 853*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; 854*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; 855*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX16)>; 856*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; 857*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; 858*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if]16")>; 859*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; 860*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; 861*4c2d3b02SDimitry Andric// FP square root, H-form 862*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_21cyc_1X], (instrs FSQRTHr)>; 863*4c2d3b02SDimitry Andric// FP square root, vector-form, F16 864*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_39cyc_1X], (instregex "^FSQRTv.f16")>; 865*4c2d3b02SDimitry Andric 866*4c2d3b02SDimitry Andric// FP data processing, scalar/vector, single/double precision 867*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; 868*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 869*4c2d3b02SDimitry Andric (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; 870*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 871*4c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; 872*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 873*4c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; 874*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1X], 875*4c2d3b02SDimitry Andric (instregex "^FCMPE?(S|D)")>; 876*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], 877*4c2d3b02SDimitry Andric (instregex "^FCCMPE?(S|D)")>; 878*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], 879*4c2d3b02SDimitry Andric (instregex "^FCSEL(S|D)")>; 880*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; 881*4c2d3b02SDimitry Andric// Convert FP to integer, S/D-form 882*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi](32|64)")>; 883*4c2d3b02SDimitry Andric// Convert to FP from GPR, S/D-form 884*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]to[DS]$")>; 885*4c2d3b02SDimitry Andric// Convert to FP from GPR, fixed-point, S/D-form 886*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX][SD]ri$")>; 887*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_19cyc_1X], (instregex "^FDIVv.[if](64)", "FDIVD")>; 888*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_12cyc_1X], (instregex "^FDIVv.[if](32)", "FDIVS")>; 889*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; 890*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; 891*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; 892*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX32, FMULX64)>; 893*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULSrr, FNMULSrr)>; 894*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULDrr, FNMULDrr)>; 895*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; 896*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; 897*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; 898*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; 899*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; 900*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT(32|64)")>; 901*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_63cyc_1X], (instregex "^FSQRTv.f64", "^FSQRTDr")>; 902*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_33cyc_1X], (instregex "^FSQRTv.f32", "^FSQRTSr")>; 903*4c2d3b02SDimitry Andric 904*4c2d3b02SDimitry Andric// FP miscellaneous instructions 905*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; 906*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD]Hr")>; 907*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; 908*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVTLv")>; 909*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT(N|XN)v")>; 910*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1X_1Z], (instrs FJCVTZS)>; 911*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; 912*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; 913*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; 914*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "^FMOVXDHighr")>; 915*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; 916*4c2d3b02SDimitry Andric 917*4c2d3b02SDimitry Andric// Integer arithmetic and logical instructions 918*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 919*4c2d3b02SDimitry Andric (instregex "ADC(W|X)r", "SBC(W|X)r")>; 920*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_Arith], 921*4c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]")>; 922*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], 923*4c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[ri]")>; 924*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_ArithFlagsetting], 925*4c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|SUB)S[WX]r[sx]")>; 926*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 927*4c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|SUB)S[WX]r[ri]")>; 928*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 929*4c2d3b02SDimitry Andric (instregex "(ADC|SBC)S[WX]r")>; 930*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs RMIF)>; 931*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 932*4c2d3b02SDimitry Andric (instregex "(CCMN|CCMP)(X|W)")>; 933*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 934*4c2d3b02SDimitry Andric (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; 935*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_13cyc_1BS_1X], (instrs SDIVWr, UDIVWr)>; 936*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_19cyc_2BS_1X], (instrs SDIVXr, UDIVXr)>; 937*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], 938*4c2d3b02SDimitry Andric (instregex "(S|U)MULHr")>; 939*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1BS_1AB], 940*4c2d3b02SDimitry Andric (instregex "(S|U)?M(ADD|SUB)L?r")>; 941*4c2d3b02SDimitry Andric 942*4c2d3b02SDimitry Andric// Integer load instructions 943*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 944*4c2d3b02SDimitry Andric (instregex "(LDNP|LDP|LDPSW)(X|W)")>; 945*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 946*4c2d3b02SDimitry Andric (instregex "LDR(B|D|H|Q|S)ui")>; 947*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 948*4c2d3b02SDimitry Andric (instregex "LDR(D|Q|W|X)l")>; 949*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 950*4c2d3b02SDimitry Andric (instregex "LDTR(B|H|W|X)i")>; 951*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 952*4c2d3b02SDimitry Andric (instregex "LDTRS(BW|BX|HW|HX|W)i")>; 953*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 954*4c2d3b02SDimitry Andric (instregex "LDUR(BB|HH|X|W)i")>; 955*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 956*4c2d3b02SDimitry Andric (instregex "LDURS(BW|BX|HW|HX|W)i")>; 957*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 958*4c2d3b02SDimitry Andric (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; 959*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1L], 960*4c2d3b02SDimitry Andric (instrs PRFMl, PRFUMi, PRFUMi)>; 961*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1L], 962*4c2d3b02SDimitry Andric (instrs PRFMroW, PRFMroX)>; 963*4c2d3b02SDimitry Andric 964*4c2d3b02SDimitry Andric// Integer miscellaneous instructions 965*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs ADR, ADRP)>; 966*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "EXTR(W|X)")>; 967*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; 968*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; 969*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "CLS(W|X)")>; 970*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs SETF8, SETF16)>; 971*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], 972*4c2d3b02SDimitry Andric (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 973*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], 974*4c2d3b02SDimitry Andric (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; 975*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], 976*4c2d3b02SDimitry Andric (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; 977*4c2d3b02SDimitry Andric 978*4c2d3b02SDimitry Andric// Integer store instructions 979*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instregex "STNP(X|W)i")>; 980*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STPXi)>; 981*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1B_1S], (instrs STPWi)>; 982*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1B_1S_1AB], (instregex "STP(W|X)(pre|post)")>; 983*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; 984*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], (instregex "STUR(BB|HH|X|W)i", 985*4c2d3b02SDimitry Andric "STR(X|W)ui", 986*4c2d3b02SDimitry Andric "STUR(BB|HH|X|W)i")>; 987*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroX, STRXroX)>; 988*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroW, STRXroW)>; 989*4c2d3b02SDimitry Andric 990*4c2d3b02SDimitry Andric// Memory tagging 991*4c2d3b02SDimitry Andric 992*4c2d3b02SDimitry Andric// Insert Random Tags 993*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1BS_1B], (instrs IRG, IRGstack)>; 994*4c2d3b02SDimitry Andric// Load allocation tag 995*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L_1B], (instrs LDG, LDGM)>; 996*4c2d3b02SDimitry Andric// Store allocation tags 997*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], 998*4c2d3b02SDimitry Andric (instrs STGi, STGM, STGPreIndex, STGPostIndex)>; 999*4c2d3b02SDimitry Andric// Store allocation tags and pair of registers 1000*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], 1001*4c2d3b02SDimitry Andric (instrs STGPi, STGPpre, STGPpost)>; 1002*4c2d3b02SDimitry Andric// Store allocation tags and zero data 1003*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], 1004*4c2d3b02SDimitry Andric (instrs STZGi, STZGM, STZGPreIndex, STZGPostIndex)>; 1005*4c2d3b02SDimitry Andric// Store two tags 1006*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], 1007*4c2d3b02SDimitry Andric (instrs ST2Gi, ST2GPreIndex, ST2GPostIndex)>; 1008*4c2d3b02SDimitry Andric// Store two tags and zero data 1009*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], 1010*4c2d3b02SDimitry Andric (instrs STZ2Gi, STZ2GPreIndex, STZ2GPostIndex)>; 1011*4c2d3b02SDimitry Andric// Subtract Pointer 1012*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBP)>; 1013*4c2d3b02SDimitry Andric// Subtract Pointer, flagset 1014*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBPS)>; 1015*4c2d3b02SDimitry Andric// Insert Tag Mask 1016*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs GMI)>; 1017*4c2d3b02SDimitry Andric// Arithmetic, immediate to logical address tag 1018*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instrs ADDG, SUBG)>; 1019*4c2d3b02SDimitry Andric 1020*4c2d3b02SDimitry Andric// Pointer authentication 1021*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^AUT")>; 1022*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1BS_1A], 1023*4c2d3b02SDimitry Andric (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; 1024*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1BS_2A], 1025*4c2d3b02SDimitry Andric (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; 1026*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^PAC")>; 1027*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1L], (instregex "^LDRA(A|B)")>; 1028*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instrs XPACD, XPACI)>; 1029*4c2d3b02SDimitry Andric 1030*4c2d3b02SDimitry Andric// Vector integer instructions 1031*4c2d3b02SDimitry Andric// -- absolute difference 1032*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1033*4c2d3b02SDimitry Andric (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", 1034*4c2d3b02SDimitry Andric "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; 1035*4c2d3b02SDimitry Andric// -- arithmetic 1036*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1037*4c2d3b02SDimitry Andric (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", 1038*4c2d3b02SDimitry Andric "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", 1039*4c2d3b02SDimitry Andric "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; 1040*4c2d3b02SDimitry Andric// -- arithmetic, horizontal, 16B 1041*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY], 1042*4c2d3b02SDimitry Andric (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; 1043*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY], 1044*4c2d3b02SDimitry Andric (instregex "^[SU](MIN|MAX)Vv16i8v")>; 1045*4c2d3b02SDimitry Andric// -- arithmetic, horizontal, 4H/4S 1046*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY], 1047*4c2d3b02SDimitry Andric (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; 1048*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY], 1049*4c2d3b02SDimitry Andric (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; 1050*4c2d3b02SDimitry Andric// -- arithmetic, horizontal, 8B/8H 1051*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY], 1052*4c2d3b02SDimitry Andric (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; 1053*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY], 1054*4c2d3b02SDimitry Andric (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; 1055*4c2d3b02SDimitry Andric// -- arithmetic, narrowing 1056*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; 1057*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; 1058*4c2d3b02SDimitry Andric// -- arithmetic, pairwise 1059*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1060*4c2d3b02SDimitry Andric (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; 1061*4c2d3b02SDimitry Andric// -- arithmetic, saturating 1062*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1063*4c2d3b02SDimitry Andric (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; 1064*4c2d3b02SDimitry Andric// -- bit count 1065*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1066*4c2d3b02SDimitry Andric (instregex "^(CLS|CLZ|CNT)v")>; 1067*4c2d3b02SDimitry Andric// -- compare 1068*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1069*4c2d3b02SDimitry Andric (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", 1070*4c2d3b02SDimitry Andric "^CMHIv", "^CMHSv")>; 1071*4c2d3b02SDimitry Andric// -- compare non-zero 1072*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^CMTSTv")>; 1073*4c2d3b02SDimitry Andric// -- dot product 1074*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; 1075*4c2d3b02SDimitry Andric// -- fp reciprocal estimate 1076*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1X], (instregex "^FRECPEv", "^FRSQRTEv")>; 1077*4c2d3b02SDimitry Andric// -- integer reciprocal estimate 1078*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; 1079*4c2d3b02SDimitry Andric// -- logical 1080*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1081*4c2d3b02SDimitry Andric (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 1082*4c2d3b02SDimitry Andric// -- logical, narrowing 1083*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], 1084*4c2d3b02SDimitry Andric (instregex "RSHRNv", 1085*4c2d3b02SDimitry Andric "SHRNv", "SQSHRNv", "SQSHRUNv", 1086*4c2d3b02SDimitry Andric "UQXTNv")>; 1087*4c2d3b02SDimitry Andric// -- matrix multiply 1088*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 1089*4c2d3b02SDimitry Andric (instrs SMMLA, UMMLA, USMMLA)>; 1090*4c2d3b02SDimitry Andric// -- max/min 1091*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1092*4c2d3b02SDimitry Andric (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 1093*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1094*4c2d3b02SDimitry Andric (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 1095*4c2d3b02SDimitry Andric// -- move immediate 1096*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; 1097*4c2d3b02SDimitry Andric// -- multiply 1098*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 1099*4c2d3b02SDimitry Andric (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; 1100*4c2d3b02SDimitry Andric// -- multiply accumulate 1101*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 1102*4c2d3b02SDimitry Andric (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; 1103*4c2d3b02SDimitry Andric// -- negation, saturating 1104*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; 1105*4c2d3b02SDimitry Andric// -- reverse bits/bytes 1106*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1107*4c2d3b02SDimitry Andric (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; 1108*4c2d3b02SDimitry Andric// -- shift 1109*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 1110*4c2d3b02SDimitry Andric// -- shift and accumulate 1111*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1112*4c2d3b02SDimitry Andric (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; 1113*4c2d3b02SDimitry Andric// -- shift, saturating 1114*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1115*4c2d3b02SDimitry Andric (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", 1116*4c2d3b02SDimitry Andric "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", 1117*4c2d3b02SDimitry Andric "^UQSHL")>; 1118*4c2d3b02SDimitry Andric 1119*4c2d3b02SDimitry Andric// Vector miscellaneous instructions 1120*4c2d3b02SDimitry Andric// -- duplicate element 1121*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^DUPv.+lane")>; 1122*4c2d3b02SDimitry Andric// -- duplicate from GPR 1123*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^DUPv.+gpr")>; 1124*4c2d3b02SDimitry Andric// -- extract narrow 1125*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^XTNv")>; 1126*4c2d3b02SDimitry Andric// -- insert/extract element 1127*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; 1128*4c2d3b02SDimitry Andric// -- move FP immediate 1129*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOVv")>; 1130*4c2d3b02SDimitry Andric// -- move element to GPR 1131*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "(S|U)MOVv")>; 1132*4c2d3b02SDimitry Andric// -- move from GPR to any element 1133*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; 1134*4c2d3b02SDimitry Andric// -- table lookup 1135*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1136*4c2d3b02SDimitry Andric (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; 1137*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY], 1138*4c2d3b02SDimitry Andric (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; 1139*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY], 1140*4c2d3b02SDimitry Andric (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; 1141*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY], 1142*4c2d3b02SDimitry Andric (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; 1143*4c2d3b02SDimitry Andric// -- transpose 1144*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 1145*4c2d3b02SDimitry Andric (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; 1146*4c2d3b02SDimitry Andric// -- zip/unzip 1147*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; 1148*4c2d3b02SDimitry Andric 1149*4c2d3b02SDimitry Andric} // SchedModel = Ampere1BModel 1150