1*4c2d3b02SDimitry Andric//=- AArch64SchedAmpere1B.td - Ampere-1B scheduling def -----*- tablegen -*-=//
2*4c2d3b02SDimitry Andric//
3*4c2d3b02SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*4c2d3b02SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5*4c2d3b02SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*4c2d3b02SDimitry Andric//
7*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===//
8*4c2d3b02SDimitry Andric//
9*4c2d3b02SDimitry Andric// This file defines the machine model for the Ampere Computing Ampere-1B to
10*4c2d3b02SDimitry Andric// support instruction scheduling and other instruction cost heuristics.
11*4c2d3b02SDimitry Andric//
12*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===//
13*4c2d3b02SDimitry Andric
14*4c2d3b02SDimitry Andric// The Ampere-1B core is an out-of-order micro-architecture.  The front
15*4c2d3b02SDimitry Andric// end has branch prediction, with a 10-cycle recovery time from a
16*4c2d3b02SDimitry Andric// mispredicted branch.  Instructions coming out of the front end are
17*4c2d3b02SDimitry Andric// decoded into internal micro-ops (uops).
18*4c2d3b02SDimitry Andric
19*4c2d3b02SDimitry Andricdef Ampere1BModel : SchedMachineModel {
20*4c2d3b02SDimitry Andric  let IssueWidth            =  12;  // Maximum micro-ops dispatch rate.
21*4c2d3b02SDimitry Andric  let MicroOpBufferSize     = 192;  // micro-op re-order buffer size
22*4c2d3b02SDimitry Andric  let LoadLatency           =   3;  // Optimistic load latency
23*4c2d3b02SDimitry Andric  let MispredictPenalty     =  10;  // Branch mispredict penalty
24*4c2d3b02SDimitry Andric  let LoopMicroOpBufferSize =  32;  // Instruction queue size
25*4c2d3b02SDimitry Andric  let CompleteModel         =   1;
26*4c2d3b02SDimitry Andric
27*4c2d3b02SDimitry Andric  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
28*4c2d3b02SDimitry Andric                                                    SMEUnsupported.F,
29*4c2d3b02SDimitry Andric                                                    PAUnsupported.F);
30*4c2d3b02SDimitry Andric}
31*4c2d3b02SDimitry Andric
32*4c2d3b02SDimitry Andriclet SchedModel = Ampere1BModel in {
33*4c2d3b02SDimitry Andric
34*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===//
35*4c2d3b02SDimitry Andric// Define each kind of processor resource and number available on Ampere-1B.
36*4c2d3b02SDimitry Andric
37*4c2d3b02SDimitry Andricdef Ampere1BUnitA  : ProcResource<2>;  // integer single-cycle, branch, and flags r/w
38*4c2d3b02SDimitry Andricdef Ampere1BUnitB  : ProcResource<2>;  // integer single-cycle, and complex shifts
39*4c2d3b02SDimitry Andricdef Ampere1BUnitBS : ProcResource<1>;  // integer multi-cycle
40*4c2d3b02SDimitry Andricdef Ampere1BUnitL  : ProcResource<2>;  // load
41*4c2d3b02SDimitry Andricdef Ampere1BUnitS  : ProcResource<2>;  // store address calculation
42*4c2d3b02SDimitry Andricdef Ampere1BUnitX  : ProcResource<1>;  // FP and vector operations, and flag write
43*4c2d3b02SDimitry Andricdef Ampere1BUnitY  : ProcResource<1>;  // FP and vector operations, and crypto
44*4c2d3b02SDimitry Andricdef Ampere1BUnitZ  : ProcResource<1>;  // FP store data and FP-to-integer moves
45*4c2d3b02SDimitry Andric
46*4c2d3b02SDimitry Andricdef Ampere1BUnitAB : ProcResGroup<[Ampere1BUnitA, Ampere1BUnitB]>;
47*4c2d3b02SDimitry Andricdef Ampere1BUnitXY : ProcResGroup<[Ampere1BUnitX, Ampere1BUnitY]>;
48*4c2d3b02SDimitry Andric
49*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===//
50*4c2d3b02SDimitry Andric// Define customized scheduler read/write types specific to the Ampere-1.
51*4c2d3b02SDimitry Andric
52*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1A : SchedWriteRes<[Ampere1BUnitA]> {
53*4c2d3b02SDimitry Andric  let Latency = 1;
54*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
55*4c2d3b02SDimitry Andric}
56*4c2d3b02SDimitry Andric
57*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_2A : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitA]> {
58*4c2d3b02SDimitry Andric  let Latency = 1;
59*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
60*4c2d3b02SDimitry Andric}
61*4c2d3b02SDimitry Andric
62*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1B : SchedWriteRes<[Ampere1BUnitB]> {
63*4c2d3b02SDimitry Andric  let Latency = 1;
64*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
65*4c2d3b02SDimitry Andric}
66*4c2d3b02SDimitry Andric
67*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> {
68*4c2d3b02SDimitry Andric  let Latency = 1;
69*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
70*4c2d3b02SDimitry Andric}
71*4c2d3b02SDimitry Andric
72*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1BS_1B : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitB]> {
73*4c2d3b02SDimitry Andric  let Latency = 1;
74*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
75*4c2d3b02SDimitry Andric}
76*4c2d3b02SDimitry Andric
77*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1AB : SchedWriteRes<[Ampere1BUnitAB]> {
78*4c2d3b02SDimitry Andric  let Latency = 1;
79*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
80*4c2d3b02SDimitry Andric}
81*4c2d3b02SDimitry Andric
82*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1AB_1A : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitA]> {
83*4c2d3b02SDimitry Andric  let Latency = 1;
84*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
85*4c2d3b02SDimitry Andric}
86*4c2d3b02SDimitry Andric
87*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1L : SchedWriteRes<[Ampere1BUnitL]> {
88*4c2d3b02SDimitry Andric  let Latency = 1;
89*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
90*4c2d3b02SDimitry Andric}
91*4c2d3b02SDimitry Andric
92*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1S : SchedWriteRes<[Ampere1BUnitS]> {
93*4c2d3b02SDimitry Andric  let Latency = 1;
94*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
95*4c2d3b02SDimitry Andric}
96*4c2d3b02SDimitry Andric
97*4c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_2S : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS]> {
98*4c2d3b02SDimitry Andric  let Latency = 1;
99*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
100*4c2d3b02SDimitry Andric}
101*4c2d3b02SDimitry Andric
102*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1Y : SchedWriteRes<[Ampere1BUnitY]> {
103*4c2d3b02SDimitry Andric  let Latency = 2;
104*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
105*4c2d3b02SDimitry Andric}
106*4c2d3b02SDimitry Andric
107*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_2AB : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitAB]> {
108*4c2d3b02SDimitry Andric  let Latency = 2;
109*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
110*4c2d3b02SDimitry Andric}
111*4c2d3b02SDimitry Andric
112*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1AB : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitAB]> {
113*4c2d3b02SDimitry Andric  let Latency = 2;
114*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
115*4c2d3b02SDimitry Andric}
116*4c2d3b02SDimitry Andric
117*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1S : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitS]> {
118*4c2d3b02SDimitry Andric  let Latency = 2;
119*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
120*4c2d3b02SDimitry Andric}
121*4c2d3b02SDimitry Andric
122*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1S_1AB : SchedWriteRes<[Ampere1BUnitB,
123*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,
124*4c2d3b02SDimitry Andric                                                  Ampere1BUnitAB]> {
125*4c2d3b02SDimitry Andric  let Latency = 2;
126*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
127*4c2d3b02SDimitry Andric}
128*4c2d3b02SDimitry Andric
129*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS,
130*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ,
131*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ]> {
132*4c2d3b02SDimitry Andric  let Latency = 2;
133*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
134*4c2d3b02SDimitry Andric}
135*4c2d3b02SDimitry Andric
136*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> {
137*4c2d3b02SDimitry Andric  let Latency = 2;
138*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
139*4c2d3b02SDimitry Andric}
140*4c2d3b02SDimitry Andric
141*4c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitZ]> {
142*4c2d3b02SDimitry Andric  let Latency = 2;
143*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
144*4c2d3b02SDimitry Andric}
145*4c2d3b02SDimitry Andric
146*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> {
147*4c2d3b02SDimitry Andric  let Latency = 3;
148*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
149*4c2d3b02SDimitry Andric}
150*4c2d3b02SDimitry Andric
151*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1L : SchedWriteRes<[Ampere1BUnitL]> {
152*4c2d3b02SDimitry Andric  let Latency = 3;
153*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
154*4c2d3b02SDimitry Andric}
155*4c2d3b02SDimitry Andric
156*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
157*4c2d3b02SDimitry Andric  let Latency = 3;
158*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
159*4c2d3b02SDimitry Andric}
160*4c2d3b02SDimitry Andric
161*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> {
162*4c2d3b02SDimitry Andric  let Latency = 3;
163*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
164*4c2d3b02SDimitry Andric}
165*4c2d3b02SDimitry Andric
166*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1Z : SchedWriteRes<[Ampere1BUnitZ]> {
167*4c2d3b02SDimitry Andric  let Latency = 3;
168*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
169*4c2d3b02SDimitry Andric}
170*4c2d3b02SDimitry Andric
171*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS,
172*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ]> {
173*4c2d3b02SDimitry Andric  let Latency = 3;
174*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
175*4c2d3b02SDimitry Andric}
176*4c2d3b02SDimitry Andric
177*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS,
178*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ, Ampere1BUnitZ]> {
179*4c2d3b02SDimitry Andric  let Latency = 3;
180*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
181*4c2d3b02SDimitry Andric}
182*4c2d3b02SDimitry Andric
183*4c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_2S_2Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS,
184*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ, Ampere1BUnitZ]> {
185*4c2d3b02SDimitry Andric  let Latency = 3;
186*4c2d3b02SDimitry Andric  let NumMicroOps = 4;
187*4c2d3b02SDimitry Andric}
188*4c2d3b02SDimitry Andric
189*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1BS_1AB : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitAB]> {
190*4c2d3b02SDimitry Andric  let Latency = 4;
191*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
192*4c2d3b02SDimitry Andric}
193*4c2d3b02SDimitry Andric
194*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1L : SchedWriteRes<[Ampere1BUnitL]> {
195*4c2d3b02SDimitry Andric  let Latency = 4;
196*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
197*4c2d3b02SDimitry Andric}
198*4c2d3b02SDimitry Andric
199*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_2L : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL]> {
200*4c2d3b02SDimitry Andric  let Latency = 4;
201*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
202*4c2d3b02SDimitry Andric}
203*4c2d3b02SDimitry Andric
204*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1L_1B : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitB]> {
205*4c2d3b02SDimitry Andric  let Latency = 4;
206*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
207*4c2d3b02SDimitry Andric}
208*4c2d3b02SDimitry Andric
209*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
210*4c2d3b02SDimitry Andric  let Latency = 4;
211*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
212*4c2d3b02SDimitry Andric}
213*4c2d3b02SDimitry Andric
214*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> {
215*4c2d3b02SDimitry Andric  let Latency = 4;
216*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
217*4c2d3b02SDimitry Andric}
218*4c2d3b02SDimitry Andric
219*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> {
220*4c2d3b02SDimitry Andric  let Latency = 4;
221*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
222*4c2d3b02SDimitry Andric}
223*4c2d3b02SDimitry Andric
224*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> {
225*4c2d3b02SDimitry Andric  let Latency = 5;
226*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
227*4c2d3b02SDimitry Andric}
228*4c2d3b02SDimitry Andric
229*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1BUnitXY,
230*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,
231*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ]> {
232*4c2d3b02SDimitry Andric  let Latency = 4;
233*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
234*4c2d3b02SDimitry Andric}
235*4c2d3b02SDimitry Andric
236*4c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_3S_3Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS,
237*4c2d3b02SDimitry Andric                                              Ampere1BUnitS, Ampere1BUnitZ,
238*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ, Ampere1BUnitZ]> {
239*4c2d3b02SDimitry Andric  let Latency = 4;
240*4c2d3b02SDimitry Andric  let NumMicroOps = 6;
241*4c2d3b02SDimitry Andric}
242*4c2d3b02SDimitry Andric
243*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_4S_4Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS,
244*4c2d3b02SDimitry Andric                                              Ampere1BUnitS, Ampere1BUnitS,
245*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ, Ampere1BUnitZ,
246*4c2d3b02SDimitry Andric                                              Ampere1BUnitZ, Ampere1BUnitZ]> {
247*4c2d3b02SDimitry Andric  let Latency = 5;
248*4c2d3b02SDimitry Andric  let NumMicroOps = 8;
249*4c2d3b02SDimitry Andric}
250*4c2d3b02SDimitry Andric
251*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1L_1BS : SchedWriteRes<[Ampere1BUnitL,
252*4c2d3b02SDimitry Andric                                               Ampere1BUnitBS]> {
253*4c2d3b02SDimitry Andric  let Latency = 5;
254*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
255*4c2d3b02SDimitry Andric}
256*4c2d3b02SDimitry Andric
257*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_3L : SchedWriteRes<[Ampere1BUnitL,
258*4c2d3b02SDimitry Andric                                           Ampere1BUnitL,
259*4c2d3b02SDimitry Andric                                           Ampere1BUnitL]> {
260*4c2d3b02SDimitry Andric  let Latency = 5;
261*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
262*4c2d3b02SDimitry Andric}
263*4c2d3b02SDimitry Andric
264*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_4L : SchedWriteRes<[Ampere1BUnitL,
265*4c2d3b02SDimitry Andric                                           Ampere1BUnitL,
266*4c2d3b02SDimitry Andric                                           Ampere1BUnitL,
267*4c2d3b02SDimitry Andric                                           Ampere1BUnitL]> {
268*4c2d3b02SDimitry Andric  let Latency = 5;
269*4c2d3b02SDimitry Andric  let NumMicroOps = 4;
270*4c2d3b02SDimitry Andric}
271*4c2d3b02SDimitry Andric
272*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
273*4c2d3b02SDimitry Andric  let Latency = 5;
274*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
275*4c2d3b02SDimitry Andric}
276*4c2d3b02SDimitry Andric
277*4c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY,
278*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,  Ampere1BUnitS,
279*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ,  Ampere1BUnitZ]> {
280*4c2d3b02SDimitry Andric  let Latency = 5;
281*4c2d3b02SDimitry Andric  let NumMicroOps = 6;
282*4c2d3b02SDimitry Andric}
283*4c2d3b02SDimitry Andric
284*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1BS_1A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA]> {
285*4c2d3b02SDimitry Andric  let Latency = 6;
286*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
287*4c2d3b02SDimitry Andric}
288*4c2d3b02SDimitry Andric
289*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1BS_2A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA,
290*4c2d3b02SDimitry Andric                                                               Ampere1BUnitA]> {
291*4c2d3b02SDimitry Andric  let Latency = 6;
292*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
293*4c2d3b02SDimitry Andric}
294*4c2d3b02SDimitry Andric
295*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1L_1XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitXY]> {
296*4c2d3b02SDimitry Andric  let Latency = 6;
297*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
298*4c2d3b02SDimitry Andric}
299*4c2d3b02SDimitry Andric
300*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2L_2XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
301*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY]> {
302*4c2d3b02SDimitry Andric  let Latency = 6;
303*4c2d3b02SDimitry Andric  let NumMicroOps = 4;
304*4c2d3b02SDimitry Andric}
305*4c2d3b02SDimitry Andric
306*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
307*4c2d3b02SDimitry Andric  let Latency = 6;
308*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
309*4c2d3b02SDimitry Andric}
310*4c2d3b02SDimitry Andric
311*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> {
312*4c2d3b02SDimitry Andric  let Latency = 6;
313*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
314*4c2d3b02SDimitry Andric}
315*4c2d3b02SDimitry Andric
316*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY,
317*4c2d3b02SDimitry Andric                                            Ampere1BUnitXY]> {
318*4c2d3b02SDimitry Andric  let Latency = 6;
319*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
320*4c2d3b02SDimitry Andric}
321*4c2d3b02SDimitry Andric
322*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY,
323*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,  Ampere1BUnitS,
324*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ,  Ampere1BUnitZ]> {
325*4c2d3b02SDimitry Andric  let Latency = 6;
326*4c2d3b02SDimitry Andric  let NumMicroOps = 6;
327*4c2d3b02SDimitry Andric}
328*4c2d3b02SDimitry Andric
329*4c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY,
330*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,  Ampere1BUnitS,  Ampere1BUnitS,
331*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ,  Ampere1BUnitZ,  Ampere1BUnitZ]> {
332*4c2d3b02SDimitry Andric  let Latency = 6;
333*4c2d3b02SDimitry Andric  let NumMicroOps = 9;
334*4c2d3b02SDimitry Andric}
335*4c2d3b02SDimitry Andric
336*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> {
337*4c2d3b02SDimitry Andric  let Latency = 7;
338*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
339*4c2d3b02SDimitry Andric}
340*4c2d3b02SDimitry Andric
341*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1XY_1Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitZ]> {
342*4c2d3b02SDimitry Andric  let Latency = 7;
343*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
344*4c2d3b02SDimitry Andric}
345*4c2d3b02SDimitry Andric
346*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1X_1Z : SchedWriteRes<[Ampere1BUnitX, Ampere1BUnitZ]> {
347*4c2d3b02SDimitry Andric  let Latency = 7;
348*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
349*4c2d3b02SDimitry Andric}
350*4c2d3b02SDimitry Andric
351*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
352*4c2d3b02SDimitry Andric                                               Ampere1BUnitL,  Ampere1BUnitXY,
353*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY]> {
354*4c2d3b02SDimitry Andric  let Latency = 7;
355*4c2d3b02SDimitry Andric  let NumMicroOps = 6;
356*4c2d3b02SDimitry Andric}
357*4c2d3b02SDimitry Andric
358*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
359*4c2d3b02SDimitry Andric                                               Ampere1BUnitL,  Ampere1BUnitL,
360*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY,
361*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY]> {
362*4c2d3b02SDimitry Andric  let Latency = 7;
363*4c2d3b02SDimitry Andric  let NumMicroOps = 8;
364*4c2d3b02SDimitry Andric}
365*4c2d3b02SDimitry Andric
366*4c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY,
367*4c2d3b02SDimitry Andric                                                  Ampere1BUnitXY, Ampere1BUnitXY,
368*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,  Ampere1BUnitS,
369*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,  Ampere1BUnitS,
370*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ,  Ampere1BUnitZ,
371*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ,  Ampere1BUnitZ]> {
372*4c2d3b02SDimitry Andric  let Latency = 7;
373*4c2d3b02SDimitry Andric  let NumMicroOps = 12;
374*4c2d3b02SDimitry Andric}
375*4c2d3b02SDimitry Andric
376*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_1BS_1L : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitL]> {
377*4c2d3b02SDimitry Andric  let Latency = 8;
378*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
379*4c2d3b02SDimitry Andric}
380*4c2d3b02SDimitry Andric
381*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> {
382*4c2d3b02SDimitry Andric  let Latency = 8;
383*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
384*4c2d3b02SDimitry Andric}
385*4c2d3b02SDimitry Andric
386*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_2L_3XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
387*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY,
388*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY]> {
389*4c2d3b02SDimitry Andric  let Latency = 8;
390*4c2d3b02SDimitry Andric  let NumMicroOps = 5;
391*4c2d3b02SDimitry Andric}
392*4c2d3b02SDimitry Andric
393*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
394*4c2d3b02SDimitry Andric                                               Ampere1BUnitL,  Ampere1BUnitXY,
395*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY]> {
396*4c2d3b02SDimitry Andric  let Latency = 8;
397*4c2d3b02SDimitry Andric  let NumMicroOps = 6;
398*4c2d3b02SDimitry Andric}
399*4c2d3b02SDimitry Andric
400*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
401*4c2d3b02SDimitry Andric                                               Ampere1BUnitL,  Ampere1BUnitL,
402*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY,
403*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY]> {
404*4c2d3b02SDimitry Andric  let Latency = 8;
405*4c2d3b02SDimitry Andric  let NumMicroOps = 8;
406*4c2d3b02SDimitry Andric}
407*4c2d3b02SDimitry Andric
408*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> {
409*4c2d3b02SDimitry Andric  let Latency = 8;
410*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
411*4c2d3b02SDimitry Andric}
412*4c2d3b02SDimitry Andric
413*4c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_4XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY,
414*4c2d3b02SDimitry Andric                                            Ampere1BUnitXY, Ampere1BUnitXY]> {
415*4c2d3b02SDimitry Andric  let Latency = 8;
416*4c2d3b02SDimitry Andric  let NumMicroOps = 4;
417*4c2d3b02SDimitry Andric}
418*4c2d3b02SDimitry Andric
419*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY,
420*4c2d3b02SDimitry Andric                                                  Ampere1BUnitXY, Ampere1BUnitXY,
421*4c2d3b02SDimitry Andric                                                  Ampere1BUnitXY, Ampere1BUnitXY,
422*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,  Ampere1BUnitS,
423*4c2d3b02SDimitry Andric                                                  Ampere1BUnitS,  Ampere1BUnitS,
424*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ,  Ampere1BUnitZ,
425*4c2d3b02SDimitry Andric                                                  Ampere1BUnitZ,  Ampere1BUnitZ]> {
426*4c2d3b02SDimitry Andric  let Latency = 9;
427*4c2d3b02SDimitry Andric  let NumMicroOps = 14;
428*4c2d3b02SDimitry Andric}
429*4c2d3b02SDimitry Andric
430*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1A_1BS_1X : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitX]> {
431*4c2d3b02SDimitry Andric  let Latency = 9;
432*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
433*4c2d3b02SDimitry Andric}
434*4c2d3b02SDimitry Andric
435*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitXY]> {
436*4c2d3b02SDimitry Andric  let Latency = 9;
437*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
438*4c2d3b02SDimitry Andric}
439*4c2d3b02SDimitry Andric
440*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
441*4c2d3b02SDimitry Andric                                               Ampere1BUnitL,  Ampere1BUnitXY,
442*4c2d3b02SDimitry Andric                                               Ampere1BUnitXY, Ampere1BUnitXY]> {
443*4c2d3b02SDimitry Andric  let Latency = 9;
444*4c2d3b02SDimitry Andric  let NumMicroOps = 6;
445*4c2d3b02SDimitry Andric}
446*4c2d3b02SDimitry Andric
447*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
448*4c2d3b02SDimitry Andric  let Latency = 9;
449*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
450*4c2d3b02SDimitry Andric}
451*4c2d3b02SDimitry Andric
452*4c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY]> {
453*4c2d3b02SDimitry Andric  let Latency = 9;
454*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
455*4c2d3b02SDimitry Andric}
456*4c2d3b02SDimitry Andric
457*4c2d3b02SDimitry Andricdef Ampere1BWrite_10cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
458*4c2d3b02SDimitry Andric                                                Ampere1BUnitL,  Ampere1BUnitL,
459*4c2d3b02SDimitry Andric                                                Ampere1BUnitXY, Ampere1BUnitXY,
460*4c2d3b02SDimitry Andric                                                Ampere1BUnitXY, Ampere1BUnitXY]> {
461*4c2d3b02SDimitry Andric  let Latency = 10;
462*4c2d3b02SDimitry Andric  let NumMicroOps = 12;
463*4c2d3b02SDimitry Andric}
464*4c2d3b02SDimitry Andric
465*4c2d3b02SDimitry Andricdef Ampere1BWrite_11cyc_1BS_2XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY, Ampere1BUnitXY]> {
466*4c2d3b02SDimitry Andric  let Latency = 11;
467*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
468*4c2d3b02SDimitry Andric}
469*4c2d3b02SDimitry Andric
470*4c2d3b02SDimitry Andricdef Ampere1BWrite_11cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL,  Ampere1BUnitL,
471*4c2d3b02SDimitry Andric                                                Ampere1BUnitL,  Ampere1BUnitL,
472*4c2d3b02SDimitry Andric                                                Ampere1BUnitXY, Ampere1BUnitXY,
473*4c2d3b02SDimitry Andric                                                Ampere1BUnitXY, Ampere1BUnitXY]> {
474*4c2d3b02SDimitry Andric  let Latency = 11;
475*4c2d3b02SDimitry Andric  let NumMicroOps = 12;
476*4c2d3b02SDimitry Andric}
477*4c2d3b02SDimitry Andric
478*4c2d3b02SDimitry Andricdef Ampere1BWrite_12cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
479*4c2d3b02SDimitry Andric  let Latency = 12;
480*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
481*4c2d3b02SDimitry Andric}
482*4c2d3b02SDimitry Andric
483*4c2d3b02SDimitry Andricdef Ampere1BWrite_13cyc_1BS_1X : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitX]> {
484*4c2d3b02SDimitry Andric  let Latency = 13;
485*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
486*4c2d3b02SDimitry Andric}
487*4c2d3b02SDimitry Andric
488*4c2d3b02SDimitry Andricdef Ampere1BWrite_17cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
489*4c2d3b02SDimitry Andric  let Latency = 17;
490*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
491*4c2d3b02SDimitry Andric}
492*4c2d3b02SDimitry Andric
493*4c2d3b02SDimitry Andricdef Ampere1BWrite_19cyc_2BS_1X : SchedWriteRes<[Ampere1BUnitBS,
494*4c2d3b02SDimitry Andric                                                Ampere1BUnitBS,
495*4c2d3b02SDimitry Andric                                                Ampere1BUnitX]> {
496*4c2d3b02SDimitry Andric  let Latency = 13;
497*4c2d3b02SDimitry Andric  let NumMicroOps = 3;
498*4c2d3b02SDimitry Andric}
499*4c2d3b02SDimitry Andric
500*4c2d3b02SDimitry Andricdef Ampere1BWrite_19cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
501*4c2d3b02SDimitry Andric  let Latency = 19;
502*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
503*4c2d3b02SDimitry Andric}
504*4c2d3b02SDimitry Andric
505*4c2d3b02SDimitry Andricdef Ampere1BWrite_21cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
506*4c2d3b02SDimitry Andric  let Latency = 21;
507*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
508*4c2d3b02SDimitry Andric}
509*4c2d3b02SDimitry Andric
510*4c2d3b02SDimitry Andricdef Ampere1BWrite_33cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
511*4c2d3b02SDimitry Andric  let Latency = 33;
512*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
513*4c2d3b02SDimitry Andric}
514*4c2d3b02SDimitry Andric
515*4c2d3b02SDimitry Andricdef Ampere1BWrite_39cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
516*4c2d3b02SDimitry Andric  let Latency = 39;
517*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
518*4c2d3b02SDimitry Andric}
519*4c2d3b02SDimitry Andric
520*4c2d3b02SDimitry Andricdef Ampere1BWrite_63cyc_1X : SchedWriteRes<[Ampere1BUnitX]> {
521*4c2d3b02SDimitry Andric  let Latency = 63;
522*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
523*4c2d3b02SDimitry Andric}
524*4c2d3b02SDimitry Andric
525*4c2d3b02SDimitry Andric// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4),
526*4c2d3b02SDimitry Andric// which are a single uop, and for extended registers, which have full flexibility
527*4c2d3b02SDimitry Andric// across Unit A or B for both uops.
528*4c2d3b02SDimitry Andricdef Ampere1BWrite_Arith : SchedWriteVariant<[
529*4c2d3b02SDimitry Andric                                SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>,
530*4c2d3b02SDimitry Andric                                SchedVar<IsCheapLSL,      [Ampere1BWrite_1cyc_1AB]>,
531*4c2d3b02SDimitry Andric                                SchedVar<NoSchedPred,     [Ampere1BWrite_2cyc_1B_1AB]>]>;
532*4c2d3b02SDimitry Andric
533*4c2d3b02SDimitry Andricdef Ampere1BWrite_ArithFlagsetting : SchedWriteVariant<[
534*4c2d3b02SDimitry Andric                                SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>,
535*4c2d3b02SDimitry Andric                                SchedVar<IsCheapLSL,      [Ampere1BWrite_1cyc_1AB]>,
536*4c2d3b02SDimitry Andric                                SchedVar<NoSchedPred,     [Ampere1BWrite_2cyc_1B_1AB]>]>;
537*4c2d3b02SDimitry Andric
538*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===//
539*4c2d3b02SDimitry Andric// Map the target-defined scheduler read/write resources and latencies for Ampere-1.
540*4c2d3b02SDimitry Andric// This provides a coarse model, which is then specialised below.
541*4c2d3b02SDimitry Andric
542*4c2d3b02SDimitry Andricdef : WriteRes<WriteImm,   [Ampere1BUnitAB]>;  // MOVN, MOVZ
543*4c2d3b02SDimitry Andricdef : WriteRes<WriteI,     [Ampere1BUnitAB]>;  // ALU
544*4c2d3b02SDimitry Andricdef : WriteRes<WriteISReg, [Ampere1BUnitB, Ampere1BUnitAB]> {
545*4c2d3b02SDimitry Andric  let Latency = 2;
546*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
547*4c2d3b02SDimitry Andric}  // ALU of Shifted-Reg
548*4c2d3b02SDimitry Andricdef : WriteRes<WriteIEReg, [Ampere1BUnitAB, Ampere1BUnitAB]> {
549*4c2d3b02SDimitry Andric  let Latency = 2;
550*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
551*4c2d3b02SDimitry Andric}  // ALU of Extended-Reg
552*4c2d3b02SDimitry Andricdef : WriteRes<WriteExtr,  [Ampere1BUnitB]>;  // EXTR shifts a reg pair
553*4c2d3b02SDimitry Andricdef : WriteRes<WriteIS,    [Ampere1BUnitB]>;  // Shift/Scale
554*4c2d3b02SDimitry Andricdef : WriteRes<WriteID32,  [Ampere1BUnitBS, Ampere1BUnitX]> {
555*4c2d3b02SDimitry Andric  let Latency = 13;
556*4c2d3b02SDimitry Andric}  // 32-bit Divide
557*4c2d3b02SDimitry Andricdef : WriteRes<WriteID64,  [Ampere1BUnitBS, Ampere1BUnitX]> {
558*4c2d3b02SDimitry Andric  let Latency = 19;
559*4c2d3b02SDimitry Andric}  // 64-bit Divide
560*4c2d3b02SDimitry Andricdef : WriteRes<WriteIM32,  [Ampere1BUnitBS]> {
561*4c2d3b02SDimitry Andric  let Latency = 3;
562*4c2d3b02SDimitry Andric}  // 32-bit Multiply
563*4c2d3b02SDimitry Andricdef : WriteRes<WriteIM64,  [Ampere1BUnitBS, Ampere1BUnitAB]> {
564*4c2d3b02SDimitry Andric  let Latency = 3;
565*4c2d3b02SDimitry Andric}  // 64-bit Multiply
566*4c2d3b02SDimitry Andricdef : WriteRes<WriteBr,    [Ampere1BUnitA]>;
567*4c2d3b02SDimitry Andricdef : WriteRes<WriteBrReg, [Ampere1BUnitA, Ampere1BUnitA]>;
568*4c2d3b02SDimitry Andricdef : WriteRes<WriteLD,    [Ampere1BUnitL]> {
569*4c2d3b02SDimitry Andric  let Latency = 3;
570*4c2d3b02SDimitry Andric}  // Load from base addr plus immediate offset
571*4c2d3b02SDimitry Andricdef : WriteRes<WriteST,    [Ampere1BUnitS]> {
572*4c2d3b02SDimitry Andric  let Latency = 1;
573*4c2d3b02SDimitry Andric}  // Store to base addr plus immediate offset
574*4c2d3b02SDimitry Andricdef : WriteRes<WriteSTP,   [Ampere1BUnitS, Ampere1BUnitS]> {
575*4c2d3b02SDimitry Andric  let Latency = 1;
576*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
577*4c2d3b02SDimitry Andric}  // Store a register pair.
578*4c2d3b02SDimitry Andricdef : WriteRes<WriteAdr,   [Ampere1BUnitAB]>;
579*4c2d3b02SDimitry Andricdef : WriteRes<WriteLDIdx, [Ampere1BUnitAB, Ampere1BUnitS]> {
580*4c2d3b02SDimitry Andric  let Latency = 3;
581*4c2d3b02SDimitry Andric  let NumMicroOps = 1;
582*4c2d3b02SDimitry Andric}  // Load from a register index (maybe scaled).
583*4c2d3b02SDimitry Andricdef : WriteRes<WriteSTIdx, [Ampere1BUnitS, Ampere1BUnitS]> {
584*4c2d3b02SDimitry Andric  let Latency = 1;
585*4c2d3b02SDimitry Andric  let NumMicroOps = 2;
586*4c2d3b02SDimitry Andric}  // Store to a register index (maybe scaled).
587*4c2d3b02SDimitry Andricdef : WriteRes<WriteF,  [Ampere1BUnitXY]> {
588*4c2d3b02SDimitry Andric  let Latency = 2;
589*4c2d3b02SDimitry Andric}  // General floating-point ops.
590*4c2d3b02SDimitry Andricdef : WriteRes<WriteFCmp,  [Ampere1BUnitX]> {
591*4c2d3b02SDimitry Andric  let Latency = 3;
592*4c2d3b02SDimitry Andric}  // Floating-point compare.
593*4c2d3b02SDimitry Andricdef : WriteRes<WriteFCvt,  [Ampere1BUnitXY]> {
594*4c2d3b02SDimitry Andric  let Latency = 3;
595*4c2d3b02SDimitry Andric}  // Float conversion.
596*4c2d3b02SDimitry Andricdef : WriteRes<WriteFCopy, [Ampere1BUnitXY]> {
597*4c2d3b02SDimitry Andric}  // Float-int register copy.
598*4c2d3b02SDimitry Andricdef : WriteRes<WriteFImm,  [Ampere1BUnitXY]> {
599*4c2d3b02SDimitry Andric  let Latency = 2;
600*4c2d3b02SDimitry Andric}  // Float-int register copy.
601*4c2d3b02SDimitry Andricdef : WriteRes<WriteFMul,  [Ampere1BUnitXY]> {
602*4c2d3b02SDimitry Andric  let Latency = 4;
603*4c2d3b02SDimitry Andric}  // Floating-point multiply.
604*4c2d3b02SDimitry Andricdef : WriteRes<WriteFDiv,  [Ampere1BUnitXY]> {
605*4c2d3b02SDimitry Andric  let Latency = 19;
606*4c2d3b02SDimitry Andric}  // Floating-point division.
607*4c2d3b02SDimitry Andricdef : WriteRes<WriteVd,    [Ampere1BUnitXY]> {
608*4c2d3b02SDimitry Andric  let Latency = 3;
609*4c2d3b02SDimitry Andric}  // 64bit Vector D ops.
610*4c2d3b02SDimitry Andricdef : WriteRes<WriteVq,    [Ampere1BUnitXY]> {
611*4c2d3b02SDimitry Andric  let Latency = 3;
612*4c2d3b02SDimitry Andric}  // 128bit Vector Q ops.
613*4c2d3b02SDimitry Andricdef : WriteRes<WriteVLD,   [Ampere1BUnitL, Ampere1BUnitL]> {
614*4c2d3b02SDimitry Andric  let Latency = 4;
615*4c2d3b02SDimitry Andric}  // Vector loads.
616*4c2d3b02SDimitry Andricdef : WriteRes<WriteVST,   [Ampere1BUnitS, Ampere1BUnitZ]> {
617*4c2d3b02SDimitry Andric  let Latency = 2;
618*4c2d3b02SDimitry Andric}  // Vector stores.
619*4c2d3b02SDimitry Andric
620*4c2d3b02SDimitry Andricdef : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
621*4c2d3b02SDimitry Andric
622*4c2d3b02SDimitry Andricdef : WriteRes<WriteSys,     []> { let Latency = 1; }
623*4c2d3b02SDimitry Andricdef : WriteRes<WriteBarrier, []> { let Latency = 1; }
624*4c2d3b02SDimitry Andricdef : WriteRes<WriteHint,    []> { let Latency = 1; }
625*4c2d3b02SDimitry Andric
626*4c2d3b02SDimitry Andricdef : WriteRes<WriteLDHi,    []> {
627*4c2d3b02SDimitry Andric  let Latency = 3;
628*4c2d3b02SDimitry Andric}  // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP
629*4c2d3b02SDimitry Andric
630*4c2d3b02SDimitry Andric// Forwarding logic.
631*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadI,       0>;
632*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadISReg,   0>;
633*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadIEReg,   0>;
634*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadIM,      0>;
635*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadIMA,     1, [WriteIM32, WriteIM64]>;
636*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadID,      0>;
637*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadExtrHi,  0>;
638*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadST,      0>;
639*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadAdrBase, 0>;
640*4c2d3b02SDimitry Andricdef : ReadAdvance<ReadVLD,     0>;
641*4c2d3b02SDimitry Andric
642*4c2d3b02SDimitry Andric//===----------------------------------------------------------------------===//
643*4c2d3b02SDimitry Andric// Specialising the scheduling model further for Ampere-1B.
644*4c2d3b02SDimitry Andric
645*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs COPY)>;
646*4c2d3b02SDimitry Andric
647*4c2d3b02SDimitry Andric// Branch instructions
648*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs Bcc, BL, RET)>;
649*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],
650*4c2d3b02SDimitry Andric        (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
651*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2A], (instrs BLR)>;
652*4c2d3b02SDimitry Andric
653*4c2d3b02SDimitry Andric// Common Short Sequence Compression (CSSC)
654*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instregex "^ABS[WX]")>;
655*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CNT[WX]")>;
656*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "^CTZ[WX]")>;
657*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB_1A], (instregex "^[SU](MAX|MIN)[WX]")>;
658*4c2d3b02SDimitry Andric
659*4c2d3b02SDimitry Andric// Cryptography instructions
660*4c2d3b02SDimitry Andric// -- AES encryption/decryption
661*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AES[DE]")>;
662*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AESI?MC")>;
663*4c2d3b02SDimitry Andric// -- Polynomial multiplication
664*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^PMUL", "^PMULL")>;
665*4c2d3b02SDimitry Andric// -- SHA-256 hash
666*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA256(H|H2)")>;
667*4c2d3b02SDimitry Andric// -- SHA-256 schedule update
668*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA256SU[01]")>;
669*4c2d3b02SDimitry Andric// -- SHA-3 instructions
670*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
671*4c2d3b02SDimitry Andric        (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>;
672*4c2d3b02SDimitry Andric// -- SHA-512 hash
673*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA512(H|H2)")>;
674*4c2d3b02SDimitry Andric// -- SHA-512 schedule update
675*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA512SU[01]")>;
676*4c2d3b02SDimitry Andric// -- SHA1 choose/majority/parity
677*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA1[CMP]")>;
678*4c2d3b02SDimitry Andric// -- SHA1 hash/schedule update
679*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1SU[01]")>;
680*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1H")>;
681*4c2d3b02SDimitry Andric// -- SM3 hash
682*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
683*4c2d3b02SDimitry Andric    (instregex "^SM3PARTW[12]$", "^SM3SS1$", "^SM3TT[12][AB]$")>;
684*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instrs SM4E, SM4ENCKEY)>;
685*4c2d3b02SDimitry Andric
686*4c2d3b02SDimitry Andric// FP and vector load instructions
687*4c2d3b02SDimitry Andric// -- Load 1-element structure to one/all lanes
688*4c2d3b02SDimitry Andric// ---- all lanes
689*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1L_1XY],
690*4c2d3b02SDimitry Andric        (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>;
691*4c2d3b02SDimitry Andric// ---- one lane
692*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1L_1XY],
693*4c2d3b02SDimitry Andric        (instregex "^LD1i(8|16|32|64)")>;
694*4c2d3b02SDimitry Andric// -- Load 1-element structure to one/all lanes, 1D size
695*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L],
696*4c2d3b02SDimitry Andric        (instregex "^LD1Rv1d")>;
697*4c2d3b02SDimitry Andric// -- Load 1-element structures to 1 register
698*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L],
699*4c2d3b02SDimitry Andric        (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
700*4c2d3b02SDimitry Andric// -- Load 1-element structures to 2 registers
701*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L],
702*4c2d3b02SDimitry Andric        (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
703*4c2d3b02SDimitry Andric// -- Load 1-element structures to 3 registers
704*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_3L],
705*4c2d3b02SDimitry Andric        (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
706*4c2d3b02SDimitry Andric// -- Load 1-element structures to 4 registers
707*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4L],
708*4c2d3b02SDimitry Andric        (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
709*4c2d3b02SDimitry Andric// -- Load 2-element structure to all lanes of 2 registers, 1D size
710*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L],
711*4c2d3b02SDimitry Andric        (instregex "^LD2Rv1d")>;
712*4c2d3b02SDimitry Andric// -- Load 2-element structure to all lanes of 2 registers, other sizes
713*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY],
714*4c2d3b02SDimitry Andric        (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>;
715*4c2d3b02SDimitry Andric// -- Load 2-element structure to one lane of 2 registers
716*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY],
717*4c2d3b02SDimitry Andric        (instregex "^LD2i(8|16|32|64)")>;
718*4c2d3b02SDimitry Andric// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size
719*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY],
720*4c2d3b02SDimitry Andric        (instregex "^LD2Twov(16b|8h|4s|2d)")>;
721*4c2d3b02SDimitry Andric// -- Load 2-element structures to 2 registers, 8B/4H/2S size
722*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_2L_3XY],
723*4c2d3b02SDimitry Andric        (instregex "^LD2Twov(8b|4h|2s)")>;
724*4c2d3b02SDimitry Andric// -- Load 3-element structure to all lanes of 3 registers, 1D size
725*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_3L],
726*4c2d3b02SDimitry Andric        (instregex "^LD3Rv1d")>;
727*4c2d3b02SDimitry Andric// -- Load 3-element structure to all lanes of 3 registers, other sizes
728*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY],
729*4c2d3b02SDimitry Andric        (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>;
730*4c2d3b02SDimitry Andric// -- Load 3-element structure to one lane of 3 registers
731*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY],
732*4c2d3b02SDimitry Andric        (instregex "^LD3i(8|16|32|64)")>;
733*4c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes
734*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_3L_3XY],
735*4c2d3b02SDimitry Andric        (instregex "^LD3Threev(16b|8h|4s)")>;
736*4c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 2D size
737*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY],
738*4c2d3b02SDimitry Andric        (instregex "^LD3Threev2d")>;
739*4c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes
740*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_3L_3XY],
741*4c2d3b02SDimitry Andric        (instregex "^LD3Threev(8b|4h|2s)")>;
742*4c2d3b02SDimitry Andric// -- Load 4-element structure to all lanes of 4 registers, 1D size
743*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4L],
744*4c2d3b02SDimitry Andric        (instregex "^LD4Rv1d")>;
745*4c2d3b02SDimitry Andric// -- Load 4-element structure to all lanes of 4 registers, other sizes
746*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4L_4XY],
747*4c2d3b02SDimitry Andric        (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>;
748*4c2d3b02SDimitry Andric// -- Load 4-element structure to one lane of 4 registers
749*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4L_4XY],
750*4c2d3b02SDimitry Andric        (instregex "^LD4i(8|16|32|64)")>;
751*4c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, 2D size
752*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4L_4XY],
753*4c2d3b02SDimitry Andric        (instregex "^LD4Fourv2d")>;
754*4c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, 2S size
755*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_4L_8XY],
756*4c2d3b02SDimitry Andric        (instregex "^LD4Fourv2s")>;
757*4c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, other sizes
758*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_10cyc_4L_8XY],
759*4c2d3b02SDimitry Andric        (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>;
760*4c2d3b02SDimitry Andric// -- Load pair, Q-form
761*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L], (instregex "LDN?PQ")>;
762*4c2d3b02SDimitry Andric// -- Load pair, S/D-form
763*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>;
764*4c2d3b02SDimitry Andric// -- Load register
765*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDU?R[BHSDQ]i")>;
766*4c2d3b02SDimitry Andric// -- Load register, sign-extended register
767*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDR[BHSDQ]ro(W|X)")>;
768*4c2d3b02SDimitry Andric
769*4c2d3b02SDimitry Andric// FP and vector store instructions
770*4c2d3b02SDimitry Andric// -- Store 1-element structure from one lane of 1 register
771*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY_1S_1Z],
772*4c2d3b02SDimitry Andric        (instregex "^ST1i(8|16|32|64)")>;
773*4c2d3b02SDimitry Andric// -- Store 1-element structures from 1 register
774*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z],
775*4c2d3b02SDimitry Andric        (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
776*4c2d3b02SDimitry Andric// -- Store 1-element structures from 2 registers
777*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z],
778*4c2d3b02SDimitry Andric        (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
779*4c2d3b02SDimitry Andric// -- Store 1-element structures from 3 registers
780*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_3S_3Z],
781*4c2d3b02SDimitry Andric        (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
782*4c2d3b02SDimitry Andric// -- Store 1-element structures from 4 registers
783*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4S_4Z],
784*4c2d3b02SDimitry Andric        (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
785*4c2d3b02SDimitry Andric// -- Store 2-element structure from one lane of 2 registers
786*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z],
787*4c2d3b02SDimitry Andric        (instregex "^ST2i(8|16|32|64)")>;
788*4c2d3b02SDimitry Andric// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes
789*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z],
790*4c2d3b02SDimitry Andric        (instregex "^ST2Twov(16b|8h|4s|2d)")>;
791*4c2d3b02SDimitry Andric// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes
792*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY_2S_2Z],
793*4c2d3b02SDimitry Andric        (instregex "^ST2Twov(8b|4h|2s)")>;
794*4c2d3b02SDimitry Andric// -- Store 3-element structure from one lane of 3 registers
795*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z],
796*4c2d3b02SDimitry Andric        (instregex "^ST3i(8|16|32|64)")>;
797*4c2d3b02SDimitry Andric// -- Store 3-element structures from 3 registers
798*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z],
799*4c2d3b02SDimitry Andric        (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
800*4c2d3b02SDimitry Andric// -- Store 4-element structure from one lane of 4 registers
801*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z],
802*4c2d3b02SDimitry Andric        (instregex "^ST4i(8|16|32|64)")>;
803*4c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes
804*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z],
805*4c2d3b02SDimitry Andric        (instregex "^ST4Fourv(16b|8h|4s)")>;
806*4c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 2D sizes
807*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z],
808*4c2d3b02SDimitry Andric        (instregex "^ST4Fourv2d")>;
809*4c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes
810*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_6XY_4S_4Z],
811*4c2d3b02SDimitry Andric        (instregex "^ST4Fourv(8b|4h|2s)")>;
812*4c2d3b02SDimitry Andric// -- Store pair, Q-form
813*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?PQ")>;
814*4c2d3b02SDimitry Andric// -- Store pair, S/D-form
815*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?P[SD]")>;
816*4c2d3b02SDimitry Andric// -- Store register
817*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>;
818*4c2d3b02SDimitry Andric// -- Store register, sign-extended register offset
819*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>;
820*4c2d3b02SDimitry Andric
821*4c2d3b02SDimitry Andric// FP data processing, bfloat16 format
822*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFCVT)>;
823*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_2XY], (instrs BFCVTN, BFCVTN2)>;
824*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>;
825*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFMMLA)>;
826*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^BFMLAL")>;
827*4c2d3b02SDimitry Andric
828*4c2d3b02SDimitry Andric// FP data processing, scalar/vector, half precision
829*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>;
830*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
831*4c2d3b02SDimitry Andric        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>;
832*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
833*4c2d3b02SDimitry Andric        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>;
834*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
835*4c2d3b02SDimitry Andric        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>;
836*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1X],
837*4c2d3b02SDimitry Andric        (instregex "^FCMPE?H")>;
838*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X],
839*4c2d3b02SDimitry Andric        (instregex "^FCCMPE?H")>;
840*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY],
841*4c2d3b02SDimitry Andric        (instregex "^FCSELH")>;
842*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>;
843*4c2d3b02SDimitry Andric// Convert FP to integer, H-form
844*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi]16")>;
845*4c2d3b02SDimitry Andric// Convert to FP from GPR, H-form
846*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]toH$")>;
847*4c2d3b02SDimitry Andric// Convert to FP from GPR, fixed-point, H-form
848*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX]Hri$")>;
849*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1X], (instrs FDIVHrr)>;
850*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_17cyc_1X], (instregex "^FDIVv.[if]16")>;
851*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>;
852*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>;
853*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>;
854*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if]16")>;
855*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX16)>;
856*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>;
857*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>;
858*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if]16")>;
859*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>;
860*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>;
861*4c2d3b02SDimitry Andric// FP square root, H-form
862*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_21cyc_1X], (instrs FSQRTHr)>;
863*4c2d3b02SDimitry Andric// FP square root, vector-form, F16
864*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_39cyc_1X], (instregex "^FSQRTv.f16")>;
865*4c2d3b02SDimitry Andric
866*4c2d3b02SDimitry Andric// FP data processing, scalar/vector, single/double precision
867*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>;
868*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
869*4c2d3b02SDimitry Andric        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>;
870*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
871*4c2d3b02SDimitry Andric        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>;
872*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
873*4c2d3b02SDimitry Andric        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>;
874*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1X],
875*4c2d3b02SDimitry Andric        (instregex "^FCMPE?(S|D)")>;
876*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X],
877*4c2d3b02SDimitry Andric        (instregex "^FCCMPE?(S|D)")>;
878*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY],
879*4c2d3b02SDimitry Andric        (instregex "^FCSEL(S|D)")>;
880*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>;
881*4c2d3b02SDimitry Andric// Convert FP to integer, S/D-form
882*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi](32|64)")>;
883*4c2d3b02SDimitry Andric// Convert to FP from GPR, S/D-form
884*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]to[DS]$")>;
885*4c2d3b02SDimitry Andric// Convert to FP from GPR, fixed-point, S/D-form
886*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX][SD]ri$")>;
887*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_19cyc_1X], (instregex "^FDIVv.[if](64)", "FDIVD")>;
888*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_12cyc_1X], (instregex "^FDIVv.[if](32)", "FDIVS")>;
889*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>;
890*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>;
891*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>;
892*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX32, FMULX64)>;
893*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULSrr, FNMULSrr)>;
894*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULDrr, FNMULDrr)>;
895*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
896*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>;
897*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>;
898*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>;
899*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>;
900*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT(32|64)")>;
901*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_63cyc_1X], (instregex "^FSQRTv.f64", "^FSQRTDr")>;
902*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_33cyc_1X], (instregex "^FSQRTv.f32", "^FSQRTSr")>;
903*4c2d3b02SDimitry Andric
904*4c2d3b02SDimitry Andric// FP miscellaneous instructions
905*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
906*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD]Hr")>;
907*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD][SD]r")>;
908*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVTLv")>;
909*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT(N|XN)v")>;
910*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1X_1Z], (instrs FJCVTZS)>;
911*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>;
912*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>;
913*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>;
914*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "^FMOVXDHighr")>;
915*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1Z], (instregex "^FMOV[WX][HSD]r")>;
916*4c2d3b02SDimitry Andric
917*4c2d3b02SDimitry Andric// Integer arithmetic and logical instructions
918*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],
919*4c2d3b02SDimitry Andric        (instregex "ADC(W|X)r", "SBC(W|X)r")>;
920*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_Arith],
921*4c2d3b02SDimitry Andric        (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]")>;
922*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB],
923*4c2d3b02SDimitry Andric        (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[ri]")>;
924*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_ArithFlagsetting],
925*4c2d3b02SDimitry Andric        (instregex "(ADD|AND|BIC|SUB)S[WX]r[sx]")>;
926*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],
927*4c2d3b02SDimitry Andric        (instregex "(ADD|AND|BIC|SUB)S[WX]r[ri]")>;
928*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],
929*4c2d3b02SDimitry Andric        (instregex "(ADC|SBC)S[WX]r")>;
930*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs RMIF)>;
931*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],
932*4c2d3b02SDimitry Andric        (instregex "(CCMN|CCMP)(X|W)")>;
933*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],
934*4c2d3b02SDimitry Andric        (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>;
935*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_13cyc_1BS_1X], (instrs SDIVWr, UDIVWr)>;
936*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_19cyc_2BS_1X], (instrs SDIVXr, UDIVXr)>;
937*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS],
938*4c2d3b02SDimitry Andric        (instregex "(S|U)MULHr")>;
939*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1BS_1AB],
940*4c2d3b02SDimitry Andric        (instregex "(S|U)?M(ADD|SUB)L?r")>;
941*4c2d3b02SDimitry Andric
942*4c2d3b02SDimitry Andric// Integer load instructions
943*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
944*4c2d3b02SDimitry Andric        (instregex "(LDNP|LDP|LDPSW)(X|W)")>;
945*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
946*4c2d3b02SDimitry Andric        (instregex "LDR(B|D|H|Q|S)ui")>;
947*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
948*4c2d3b02SDimitry Andric        (instregex "LDR(D|Q|W|X)l")>;
949*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
950*4c2d3b02SDimitry Andric        (instregex "LDTR(B|H|W|X)i")>;
951*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
952*4c2d3b02SDimitry Andric        (instregex "LDTRS(BW|BX|HW|HX|W)i")>;
953*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
954*4c2d3b02SDimitry Andric        (instregex "LDUR(BB|HH|X|W)i")>;
955*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
956*4c2d3b02SDimitry Andric        (instregex "LDURS(BW|BX|HW|HX|W)i")>;
957*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L],
958*4c2d3b02SDimitry Andric        (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>;
959*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1L],
960*4c2d3b02SDimitry Andric        (instrs PRFMl, PRFUMi, PRFUMi)>;
961*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1L],
962*4c2d3b02SDimitry Andric        (instrs PRFMroW, PRFMroX)>;
963*4c2d3b02SDimitry Andric
964*4c2d3b02SDimitry Andric// Integer miscellaneous instructions
965*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],  (instrs ADR, ADRP)>;
966*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B],  (instregex "EXTR(W|X)")>;
967*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B],  (instregex "(S|U)?BFM(W|X)")>;
968*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>;
969*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B],  (instregex "CLS(W|X)")>;
970*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A],  (instrs SETF8, SETF16)>;
971*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB],
972*4c2d3b02SDimitry Andric        (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
973*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B],
974*4c2d3b02SDimitry Andric        (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>;
975*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B],
976*4c2d3b02SDimitry Andric        (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>;
977*4c2d3b02SDimitry Andric
978*4c2d3b02SDimitry Andric// Integer store instructions
979*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S],        (instregex "STNP(X|W)i")>;
980*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S],        (instrs STPXi)>;
981*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1B_1S],     (instrs STPWi)>;
982*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1B_1S_1AB], (instregex "STP(W|X)(pre|post)")>;
983*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S],        (instrs STTRBi, STTRHi, STTRWi, STTRXi)>;
984*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S],        (instregex "STUR(BB|HH|X|W)i",
985*4c2d3b02SDimitry Andric                                                        "STR(X|W)ui",
986*4c2d3b02SDimitry Andric                                                        "STUR(BB|HH|X|W)i")>;
987*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S],        (instrs STRWroX, STRXroX)>;
988*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S],        (instrs STRWroW, STRXroW)>;
989*4c2d3b02SDimitry Andric
990*4c2d3b02SDimitry Andric// Memory tagging
991*4c2d3b02SDimitry Andric
992*4c2d3b02SDimitry Andric// Insert Random Tags
993*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1BS_1B], (instrs IRG, IRGstack)>;
994*4c2d3b02SDimitry Andric// Load allocation tag
995*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L_1B], (instrs LDG, LDGM)>;
996*4c2d3b02SDimitry Andric// Store allocation tags
997*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S],
998*4c2d3b02SDimitry Andric    (instrs STGi, STGM, STGPreIndex, STGPostIndex)>;
999*4c2d3b02SDimitry Andric// Store allocation tags and pair of registers
1000*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S],
1001*4c2d3b02SDimitry Andric    (instrs STGPi, STGPpre, STGPpost)>;
1002*4c2d3b02SDimitry Andric// Store allocation tags and zero data
1003*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S],
1004*4c2d3b02SDimitry Andric    (instrs STZGi, STZGM, STZGPreIndex, STZGPostIndex)>;
1005*4c2d3b02SDimitry Andric// Store two tags
1006*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S],
1007*4c2d3b02SDimitry Andric    (instrs ST2Gi, ST2GPreIndex, ST2GPostIndex)>;
1008*4c2d3b02SDimitry Andric// Store two tags and zero data
1009*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S],
1010*4c2d3b02SDimitry Andric    (instrs STZ2Gi, STZ2GPreIndex, STZ2GPostIndex)>;
1011*4c2d3b02SDimitry Andric// Subtract Pointer
1012*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBP)>;
1013*4c2d3b02SDimitry Andric// Subtract Pointer, flagset
1014*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBPS)>;
1015*4c2d3b02SDimitry Andric// Insert Tag Mask
1016*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs GMI)>;
1017*4c2d3b02SDimitry Andric// Arithmetic, immediate to logical address tag
1018*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instrs ADDG, SUBG)>;
1019*4c2d3b02SDimitry Andric
1020*4c2d3b02SDimitry Andric// Pointer authentication
1021*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^AUT")>;
1022*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1BS_1A],
1023*4c2d3b02SDimitry Andric        (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>;
1024*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1BS_2A],
1025*4c2d3b02SDimitry Andric        (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>;
1026*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^PAC")>;
1027*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1L], (instregex "^LDRA(A|B)")>;
1028*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instrs XPACD, XPACI)>;
1029*4c2d3b02SDimitry Andric
1030*4c2d3b02SDimitry Andric// Vector integer instructions
1031*4c2d3b02SDimitry Andric// -- absolute difference
1032*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1033*4c2d3b02SDimitry Andric             (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv",
1034*4c2d3b02SDimitry Andric                        "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>;
1035*4c2d3b02SDimitry Andric// -- arithmetic
1036*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1037*4c2d3b02SDimitry Andric        (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD",
1038*4c2d3b02SDimitry Andric                   "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW",
1039*4c2d3b02SDimitry Andric                   "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>;
1040*4c2d3b02SDimitry Andric// -- arithmetic, horizontal, 16B
1041*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY],
1042*4c2d3b02SDimitry Andric            (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>;
1043*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY],
1044*4c2d3b02SDimitry Andric            (instregex "^[SU](MIN|MAX)Vv16i8v")>;
1045*4c2d3b02SDimitry Andric// -- arithmetic, horizontal, 4H/4S
1046*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY],
1047*4c2d3b02SDimitry Andric            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>;
1048*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY],
1049*4c2d3b02SDimitry Andric            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>;
1050*4c2d3b02SDimitry Andric// -- arithmetic, horizontal, 8B/8H
1051*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY],
1052*4c2d3b02SDimitry Andric            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>;
1053*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY],
1054*4c2d3b02SDimitry Andric            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>;
1055*4c2d3b02SDimitry Andric// -- arithmetic, narrowing
1056*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(ADD|SUB)HNv.*")>;
1057*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>;
1058*4c2d3b02SDimitry Andric// -- arithmetic, pairwise
1059*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1060*4c2d3b02SDimitry Andric        (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>;
1061*4c2d3b02SDimitry Andric// -- arithmetic, saturating
1062*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1063*4c2d3b02SDimitry Andric        (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>;
1064*4c2d3b02SDimitry Andric// -- bit count
1065*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1066*4c2d3b02SDimitry Andric        (instregex "^(CLS|CLZ|CNT)v")>;
1067*4c2d3b02SDimitry Andric// -- compare
1068*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1069*4c2d3b02SDimitry Andric        (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv",
1070*4c2d3b02SDimitry Andric                   "^CMHIv", "^CMHSv")>;
1071*4c2d3b02SDimitry Andric// -- compare non-zero
1072*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^CMTSTv")>;
1073*4c2d3b02SDimitry Andric// -- dot product
1074*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>;
1075*4c2d3b02SDimitry Andric// -- fp reciprocal estimate
1076*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1X], (instregex "^FRECPEv", "^FRSQRTEv")>;
1077*4c2d3b02SDimitry Andric// -- integer reciprocal estimate
1078*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>;
1079*4c2d3b02SDimitry Andric// -- logical
1080*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1081*4c2d3b02SDimitry Andric        (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1082*4c2d3b02SDimitry Andric// -- logical, narrowing
1083*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY],
1084*4c2d3b02SDimitry Andric        (instregex "RSHRNv",
1085*4c2d3b02SDimitry Andric                   "SHRNv", "SQSHRNv", "SQSHRUNv",
1086*4c2d3b02SDimitry Andric                   "UQXTNv")>;
1087*4c2d3b02SDimitry Andric// -- matrix multiply
1088*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
1089*4c2d3b02SDimitry Andric        (instrs SMMLA, UMMLA, USMMLA)>;
1090*4c2d3b02SDimitry Andric// -- max/min
1091*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1092*4c2d3b02SDimitry Andric        (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1093*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1094*4c2d3b02SDimitry Andric        (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1095*4c2d3b02SDimitry Andric// -- move immediate
1096*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>;
1097*4c2d3b02SDimitry Andric// -- multiply
1098*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
1099*4c2d3b02SDimitry Andric        (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>;
1100*4c2d3b02SDimitry Andric// -- multiply accumulate
1101*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY],
1102*4c2d3b02SDimitry Andric        (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>;
1103*4c2d3b02SDimitry Andric// -- negation, saturating
1104*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>;
1105*4c2d3b02SDimitry Andric// -- reverse bits/bytes
1106*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1107*4c2d3b02SDimitry Andric        (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>;
1108*4c2d3b02SDimitry Andric// -- shift
1109*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1110*4c2d3b02SDimitry Andric// -- shift and accumulate
1111*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1112*4c2d3b02SDimitry Andric        (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>;
1113*4c2d3b02SDimitry Andric// -- shift, saturating
1114*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1115*4c2d3b02SDimitry Andric        (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU",
1116*4c2d3b02SDimitry Andric                   "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL",
1117*4c2d3b02SDimitry Andric                   "^UQSHL")>;
1118*4c2d3b02SDimitry Andric
1119*4c2d3b02SDimitry Andric// Vector miscellaneous instructions
1120*4c2d3b02SDimitry Andric// -- duplicate element
1121*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^DUPv.+lane")>;
1122*4c2d3b02SDimitry Andric// -- duplicate from GPR
1123*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^DUPv.+gpr")>;
1124*4c2d3b02SDimitry Andric// -- extract narrow
1125*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^XTNv")>;
1126*4c2d3b02SDimitry Andric// -- insert/extract element
1127*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>;
1128*4c2d3b02SDimitry Andric// -- move FP immediate
1129*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOVv")>;
1130*4c2d3b02SDimitry Andric// -- move element to GPR
1131*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "(S|U)MOVv")>;
1132*4c2d3b02SDimitry Andric// -- move from GPR to any element
1133*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>;
1134*4c2d3b02SDimitry Andric// -- table lookup
1135*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1136*4c2d3b02SDimitry Andric            (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>;
1137*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY],
1138*4c2d3b02SDimitry Andric            (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>;
1139*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY],
1140*4c2d3b02SDimitry Andric            (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>;
1141*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY],
1142*4c2d3b02SDimitry Andric            (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>;
1143*4c2d3b02SDimitry Andric// -- transpose
1144*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY],
1145*4c2d3b02SDimitry Andric              (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>;
1146*4c2d3b02SDimitry Andric// -- zip/unzip
1147*4c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>;
1148*4c2d3b02SDimitry Andric
1149*4c2d3b02SDimitry Andric} // SchedModel = Ampere1BModel
1150