1//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse V1 processors.
10//
11// References:
12// - "Arm Neoverse V1 Software Optimization Guide"
13// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing"
14//   https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm
15// - "Neoverse V1"
16//   https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1
17
18//
19//===----------------------------------------------------------------------===//
20
21def NeoverseV1Model : SchedMachineModel {
22  let IssueWidth            =  15; // Maximum micro-ops dispatch rate.
23  let MicroOpBufferSize     = 256; // Micro-op re-order buffer.
24  let LoadLatency           =   4; // Optimistic load latency.
25  let MispredictPenalty     =  11; // Cycles cost of branch mispredicted.
26  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
27  let CompleteModel         =   1;
28
29  list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
30                                                    SMEUnsupported.F,
31                                                    [HasMTE]);
32}
33
34//===----------------------------------------------------------------------===//
35// Define each kind of processor resource and number available on Neoverse V1.
36// Instructions are first fetched and then decoded into internal macro-ops
37// (MOPs).  From there, the MOPs proceed through register renaming and dispatch
38// stages.  A MOP can be split into one or more micro-ops further down the
39// pipeline, after the decode stage.  Once dispatched, micro-ops wait for their
40// operands and issue out-of-order to one of the issue pipelines.  Each issue
41// pipeline can accept one micro-op per cycle.
42
43let SchedModel = NeoverseV1Model in {
44
45// Define the issue ports.
46def V1UnitB   : ProcResource<2>;  // Branch 0/1
47def V1UnitS   : ProcResource<2>;  // Integer single cycle 0/1
48def V1UnitM0  : ProcResource<1>;  // Integer multicycle 0
49def V1UnitM1  : ProcResource<1>;  // Integer multicycle 1
50def V1UnitL01 : ProcResource<2>;  // Load/Store 0/1
51def V1UnitL2  : ProcResource<1>;  // Load 2
52def V1UnitD   : ProcResource<2>;  // Store data 0/1
53def V1UnitV0  : ProcResource<1>;  // FP/ASIMD 0
54def V1UnitV1  : ProcResource<1>;  // FP/ASIMD 1
55def V1UnitV2  : ProcResource<1>;  // FP/ASIMD 2
56def V1UnitV3  : ProcResource<1>;  // FP/ASIMD 3
57
58def V1UnitI   : ProcResGroup<[V1UnitS,
59                              V1UnitM0, V1UnitM1]>;   // Integer units
60def V1UnitJ   : ProcResGroup<[V1UnitS, V1UnitM0]>;    // Integer 0-2 units
61def V1UnitM   : ProcResGroup<[V1UnitM0, V1UnitM1]>;   // Integer multicycle units
62def V1UnitL   : ProcResGroup<[V1UnitL01, V1UnitL2]>;  // Load units
63def V1UnitV   : ProcResGroup<[V1UnitV0, V1UnitV1,
64                              V1UnitV2, V1UnitV3]>;   // FP/ASIMD units
65def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>;   // FP/ASIMD 0/1 units
66def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>;   // FP/ASIMD 0/2 units
67def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>;   // FP/ASIMD 1/3 units
68
69// Define commonly used read types.
70
71// No generic forwarding is provided for these types.
72def : ReadAdvance<ReadI,       0>;
73def : ReadAdvance<ReadISReg,   0>;
74def : ReadAdvance<ReadIEReg,   0>;
75def : ReadAdvance<ReadIM,      0>;
76def : ReadAdvance<ReadIMA,     0>;
77def : ReadAdvance<ReadID,      0>;
78def : ReadAdvance<ReadExtrHi,  0>;
79def : ReadAdvance<ReadAdrBase, 0>;
80def : ReadAdvance<ReadST,      0>;
81def : ReadAdvance<ReadVLD,     0>;
82
83def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
84def : WriteRes<WriteBarrier, []> { let Latency = 1; }
85def : WriteRes<WriteHint,    []> { let Latency = 1; }
86
87
88//===----------------------------------------------------------------------===//
89// Define generic 0 micro-op types
90
91let Latency = 0, NumMicroOps = 0 in
92def V1Write_0c_0Z : SchedWriteRes<[]>;
93
94
95//===----------------------------------------------------------------------===//
96// Define generic 1 micro-op types
97
98def V1Write_1c_1B      : SchedWriteRes<[V1UnitB]>   { let Latency = 1; }
99def V1Write_1c_1I      : SchedWriteRes<[V1UnitI]>   { let Latency = 1; }
100def V1Write_1c_1J      : SchedWriteRes<[V1UnitJ]>   { let Latency = 1; }
101def V1Write_4c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 4; }
102def V1Write_6c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 6; }
103def V1Write_1c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
104def V1Write_4c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
105def V1Write_6c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
106def V1Write_2c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 2; }
107def V1Write_3c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 3; }
108def V1Write_4c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 4; }
109def V1Write_1c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 1; }
110def V1Write_2c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 2; }
111def V1Write_3c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 3; }
112def V1Write_5c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 5; }
113def V1Write_12c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 12;
114                                                      let ResourceCycles = [5]; }
115def V1Write_20c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 20;
116                                                      let ResourceCycles = [5]; }
117def V1Write_2c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 2; }
118def V1Write_3c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 3; }
119def V1Write_4c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 4; }
120def V1Write_5c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 5; }
121def V1Write_2c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 2; }
122def V1Write_3c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 3; }
123def V1Write_4c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 4; }
124def V1Write_6c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 6; }
125def V1Write_10c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 10;
126                                                      let ResourceCycles = [7]; }
127def V1Write_12c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 12;
128                                                      let ResourceCycles = [7]; }
129def V1Write_13c10_1V0  : SchedWriteRes<[V1UnitV0]>  { let Latency = 13;
130                                                      let ResourceCycles = [10]; }
131def V1Write_15c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 15;
132                                                      let ResourceCycles = [7]; }
133def V1Write_16c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 16;
134                                                      let ResourceCycles = [7]; }
135def V1Write_20c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 20;
136                                                      let ResourceCycles = [7]; }
137def V1Write_2c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
138def V1Write_3c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
139def V1Write_4c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
140def V1Write_5c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
141def V1Write_3c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
142def V1Write_4c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
143def V1Write_7c7_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
144                                                      let ResourceCycles = [7]; }
145def V1Write_10c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
146                                                      let ResourceCycles = [7]; }
147def V1Write_13c5_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
148                                                      let ResourceCycles = [5]; }
149def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
150                                                      let ResourceCycles = [11]; }
151def V1Write_15c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
152                                                      let ResourceCycles = [7]; }
153def V1Write_16c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
154                                                      let ResourceCycles = [7]; }
155def V1Write_2c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 2; }
156def V1Write_3c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 3; }
157def V1Write_4c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 4; }
158def V1Write_2c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
159def V1Write_4c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
160
161//===----------------------------------------------------------------------===//
162// Define generic 2 micro-op types
163
164let Latency = 1, NumMicroOps = 2 in
165def V1Write_1c_1B_1S     : SchedWriteRes<[V1UnitB, V1UnitS]>;
166let Latency = 6, NumMicroOps = 2 in
167def V1Write_6c_1B_1M0    : SchedWriteRes<[V1UnitB, V1UnitM0]>;
168let Latency = 3, NumMicroOps = 2 in
169def V1Write_3c_1I_1M     : SchedWriteRes<[V1UnitI, V1UnitM]>;
170let Latency = 5, NumMicroOps = 2 in
171def V1Write_5c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
172let Latency = 7, NumMicroOps = 2 in
173def V1Write_7c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
174let Latency = 6, NumMicroOps = 2 in
175def V1Write_6c_2L        : SchedWriteRes<[V1UnitL, V1UnitL]>;
176let Latency = 6, NumMicroOps = 2 in
177def V1Write_6c_1L_1M     : SchedWriteRes<[V1UnitL, V1UnitM]>;
178let Latency = 8, NumMicroOps = 2 in
179def V1Write_8c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
180let Latency = 9, NumMicroOps = 2 in
181def V1Write_9c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
182let Latency = 11, NumMicroOps = 2 in
183def V1Write_11c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
184let Latency = 1, NumMicroOps = 2 in
185def V1Write_1c_1L01_1D   : SchedWriteRes<[V1UnitL01, V1UnitD]>;
186let Latency = 6, NumMicroOps = 2 in
187def V1Write_6c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
188let Latency = 7, NumMicroOps = 2 in
189def V1Write_7c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
190let Latency = 2, NumMicroOps = 2 in
191def V1Write_2c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
192let Latency = 4, NumMicroOps = 2 in
193def V1Write_4c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
194let Latency = 6, NumMicroOps = 2 in
195def V1Write_6c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
196let Latency = 2, NumMicroOps = 2 in
197def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
198let Latency = 4, NumMicroOps = 2 in
199def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
200let Latency = 2, NumMicroOps = 2 in
201def V1Write_2c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
202let Latency = 3, NumMicroOps = 2 in
203def V1Write_3c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
204let Latency = 9, NumMicroOps = 2 in
205def V1Write_9c_1M0_1L    : SchedWriteRes<[V1UnitM0, V1UnitL]>;
206let Latency = 5, NumMicroOps = 2 in
207def V1Write_5c_1M0_1V    : SchedWriteRes<[V1UnitM0, V1UnitV]>;
208let Latency = 4, NumMicroOps = 2 in
209def V1Write_4c_1M0_1V0    : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
210let Latency = 7, NumMicroOps = 2 in
211def V1Write_7c_1M0_1V0   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
212let Latency = 5, NumMicroOps = 2 in
213def V1Write_5c_1M0_1V01    : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
214let Latency = 6, NumMicroOps = 2 in
215def V1Write_6c_1M0_1V1   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
216let Latency = 9, NumMicroOps = 2 in
217def V1Write_9c_1M0_1V1    : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
218let Latency = 4, NumMicroOps = 2 in
219def V1Write_4c_2V        : SchedWriteRes<[V1UnitV, V1UnitV]>;
220let Latency = 8, NumMicroOps = 2 in
221def V1Write_8c_1V_1V01   : SchedWriteRes<[V1UnitV, V1UnitV01]>;
222let Latency = 4, NumMicroOps = 2 in
223def V1Write_4c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
224let Latency = 5, NumMicroOps = 2 in
225def V1Write_5c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
226let Latency = 2, NumMicroOps = 2 in
227def V1Write_2c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
228let Latency = 4, NumMicroOps = 2 in
229def V1Write_4c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
230let Latency = 4, NumMicroOps = 2 in
231def V1Write_4c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
232let Latency = 6, NumMicroOps = 2 in
233def V1Write_6c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
234let Latency = 4, NumMicroOps = 2 in
235def V1Write_4c_1V13_1V   : SchedWriteRes<[V1UnitV13, V1UnitV]>;
236let Latency = 4, NumMicroOps = 2 in
237def V1Write_4c_2V13      : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
238
239//===----------------------------------------------------------------------===//
240// Define generic 3 micro-op types
241
242let Latency = 2, NumMicroOps = 3 in
243def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
244let Latency = 7, NumMicroOps = 3 in
245def V1Write_7c_2M0_1V01     : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
246let Latency = 8, NumMicroOps = 3 in
247def V1Write_8c_1L_2V        : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
248let Latency = 6, NumMicroOps = 3 in
249def V1Write_6c_3L           : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
250let Latency = 2, NumMicroOps = 3 in
251def V1Write_2c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
252let Latency = 4, NumMicroOps = 3 in
253def V1Write_4c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
254let Latency = 2, NumMicroOps = 3 in
255def V1Write_2c_2L01_1V01    : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
256let Latency = 6, NumMicroOps = 3 in
257def V1Write_6c_3V           : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
258let Latency = 4, NumMicroOps = 3 in
259def V1Write_4c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
260let Latency = 6, NumMicroOps = 3 in
261def V1Write_6c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
262let Latency = 8, NumMicroOps = 3 in
263def V1Write_8c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
264
265//===----------------------------------------------------------------------===//
266// Define generic 4 micro-op types
267
268let Latency = 8, NumMicroOps = 4 in
269def V1Write_8c_2M0_2V0   : SchedWriteRes<[V1UnitM0, V1UnitM0,
270                                          V1UnitV0, V1UnitV0]>;
271let Latency = 7, NumMicroOps = 4 in
272def V1Write_7c_4L        : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
273let Latency = 8, NumMicroOps = 4 in
274def V1Write_8c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
275                                             V1UnitV, V1UnitV]>;
276let Latency = 9, NumMicroOps = 4 in
277def V1Write_9c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
278                                             V1UnitV, V1UnitV]>;
279let Latency = 11, NumMicroOps = 4 in
280def V1Write_11c_2L_2V       : SchedWriteRes<[V1UnitL, V1UnitL,
281                                             V1UnitV, V1UnitV]>;
282let Latency = 10, NumMicroOps = 4 in
283def V1Write_10c_2L01_2V     : SchedWriteRes<[V1UnitL01, V1UnitL01,
284                                             V1UnitV, V1UnitV]>;
285let Latency = 2, NumMicroOps = 4 in
286def V1Write_2c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
287                                             V1UnitV01, V1UnitV01]>;
288let Latency = 4, NumMicroOps = 4 in
289def V1Write_4c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
290                                             V1UnitV01, V1UnitV01]>;
291let Latency = 8, NumMicroOps = 4 in
292def V1Write_8c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
293                                             V1UnitV01, V1UnitV01]>;
294let Latency = 9, NumMicroOps = 4 in
295def V1Write_9c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
296                                             V1UnitV01, V1UnitV01]>;
297let Latency = 10, NumMicroOps = 4 in
298def V1Write_10c_2L01_2V01   : SchedWriteRes<[V1UnitL01, V1UnitL01,
299                                             V1UnitV01, V1UnitV01]>;
300let Latency = 10, NumMicroOps = 4 in
301def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
302                                             V1UnitV1, V1UnitV1]>;
303let Latency = 12, NumMicroOps = 4 in
304def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
305                                             V1UnitV1, V1UnitV1]>;
306let Latency = 6, NumMicroOps = 4 in
307def V1Write_6c_4V0          : SchedWriteRes<[V1UnitV0, V1UnitV0,
308                                             V1UnitV0, V1UnitV0]>;
309let Latency = 12, NumMicroOps = 4 in
310def V1Write_12c_4V01        : SchedWriteRes<[V1UnitV01, V1UnitV01,
311                                             V1UnitV01, V1UnitV01]>;
312let Latency = 6, NumMicroOps = 4 in
313def V1Write_6c_4V02         : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
314
315//===----------------------------------------------------------------------===//
316// Define generic 5 micro-op types
317
318let Latency = 8, NumMicroOps = 5 in
319def V1Write_8c_2L_3V            : SchedWriteRes<[V1UnitL, V1UnitL,
320                                                 V1UnitV, V1UnitV, V1UnitV]>;
321let Latency = 14, NumMicroOps = 5 in
322def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV,
323                                                 V1UnitV0,
324                                                 V1UnitV1, V1UnitV1,
325                                                 V1UnitV13]>;
326let Latency = 9, NumMicroOps = 5 in
327def V1Write_9c_1V_4V01          : SchedWriteRes<[V1UnitV,
328                                                 V1UnitV01, V1UnitV01,
329                                                 V1UnitV01, V1UnitV01]>;
330let Latency = 6, NumMicroOps = 5 in
331def V1Write_6c_5V01             : SchedWriteRes<[V1UnitV01, V1UnitV01,
332                                                 V1UnitV01, V1UnitV01, V1UnitV01]>;
333
334//===----------------------------------------------------------------------===//
335// Define generic 6 micro-op types
336
337let Latency = 6, NumMicroOps = 6 in
338def V1Write_6c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
339                                           V1UnitV, V1UnitV, V1UnitV]>;
340let Latency = 8, NumMicroOps = 6 in
341def V1Write_8c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
342                                           V1UnitV, V1UnitV, V1UnitV]>;
343let Latency = 2, NumMicroOps = 6 in
344def V1Write_2c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
345                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
346let Latency = 5, NumMicroOps = 6 in
347def V1Write_5c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
348                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
349let Latency = 6, NumMicroOps = 6 in
350def V1Write_6c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
351                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
352let Latency = 11, NumMicroOps = 6 in
353def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
354                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
355let Latency = 11, NumMicroOps = 6 in
356def V1Write_11c_1V_5V01   : SchedWriteRes<[V1UnitV,
357                                           V1UnitV01, V1UnitV01,
358                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
359let Latency = 13, NumMicroOps = 6 in
360def V1Write_13c_6V01      : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01,
361                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
362
363//===----------------------------------------------------------------------===//
364// Define generic 7 micro-op types
365
366let Latency = 8, NumMicroOps = 7 in
367def V1Write_8c_3L_4V         : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
368                                              V1UnitV, V1UnitV, V1UnitV, V1UnitV]>;
369let Latency = 8, NumMicroOps = 7 in
370def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
371                                              V1UnitS,
372                                              V1UnitV01, V1UnitV01, V1UnitV01]>;
373
374//===----------------------------------------------------------------------===//
375// Define generic 8 micro-op types
376
377let Latency = 9, NumMicroOps = 8 in
378def V1Write_9c_4L_4V      : SchedWriteRes<[V1UnitL, V1UnitL,
379                                           V1UnitL, V1UnitL,
380                                           V1UnitV, V1UnitV,
381                                           V1UnitV, V1UnitV]>;
382let Latency = 2, NumMicroOps = 8 in
383def V1Write_2c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
384                                           V1UnitL01, V1UnitL01,
385                                           V1UnitV01, V1UnitV01,
386                                           V1UnitV01, V1UnitV01]>;
387let Latency = 4, NumMicroOps = 8 in
388def V1Write_4c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
389                                           V1UnitL01, V1UnitL01,
390                                           V1UnitV01, V1UnitV01,
391                                           V1UnitV01, V1UnitV01]>;
392let Latency = 12, NumMicroOps = 8 in
393def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
394                                           V1UnitL01, V1UnitL01,
395                                           V1UnitV01, V1UnitV01,
396                                           V1UnitV01, V1UnitV01]>;
397
398//===----------------------------------------------------------------------===//
399// Define generic 10 micro-op types
400
401let Latency = 13, NumMicroOps = 10 in
402def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
403                                              V1UnitL01, V1UnitL01,
404                                              V1UnitS, V1UnitS,
405                                              V1UnitV01, V1UnitV01,
406                                              V1UnitV01, V1UnitV01]>;
407let Latency = 7, NumMicroOps = 10 in
408def V1Write_7c_5L01_5V       : SchedWriteRes<[V1UnitL01, V1UnitL01,
409                                              V1UnitL01, V1UnitL01, V1UnitL01,
410                                              V1UnitV, V1UnitV,
411                                              V1UnitV, V1UnitV, V1UnitV]>;
412let Latency = 11, NumMicroOps = 10 in
413def V1Write_11c_10V0         : SchedWriteRes<[V1UnitV0,
414                                              V1UnitV0, V1UnitV0, V1UnitV0,
415                                              V1UnitV0, V1UnitV0, V1UnitV0,
416                                              V1UnitV0, V1UnitV0, V1UnitV0]>;
417
418//===----------------------------------------------------------------------===//
419// Define generic 12 micro-op types
420
421let Latency = 7, NumMicroOps = 12 in
422def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
423                                          V1UnitL01, V1UnitL01, V1UnitL01,
424                                          V1UnitV01, V1UnitV01, V1UnitV01,
425                                          V1UnitV01, V1UnitV01, V1UnitV01]>;
426
427//===----------------------------------------------------------------------===//
428// Define generic 15 micro-op types
429
430let Latency = 7, NumMicroOps = 15 in
431def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
432                                           V1UnitL01, V1UnitL01, V1UnitL01,
433                                           V1UnitS, V1UnitS,
434                                           V1UnitS, V1UnitS, V1UnitS,
435                                           V1UnitV, V1UnitV,
436                                           V1UnitV, V1UnitV, V1UnitV]>;
437
438
439//===----------------------------------------------------------------------===//
440// Define generic 18 micro-op types
441
442let Latency = 19, NumMicroOps = 18 in
443def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
444                                         V1UnitL01, V1UnitL01, V1UnitL01,
445                                         V1UnitL01, V1UnitL01, V1UnitL01,
446                                         V1UnitV, V1UnitV, V1UnitV,
447                                         V1UnitV, V1UnitV, V1UnitV,
448                                         V1UnitV, V1UnitV, V1UnitV]>;
449let Latency = 19, NumMicroOps = 18 in
450def V1Write_19c_18V0    : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0,
451                                         V1UnitV0, V1UnitV0, V1UnitV0,
452                                         V1UnitV0, V1UnitV0, V1UnitV0,
453                                         V1UnitV0, V1UnitV0, V1UnitV0,
454                                         V1UnitV0, V1UnitV0, V1UnitV0,
455                                         V1UnitV0, V1UnitV0, V1UnitV0]>;
456
457//===----------------------------------------------------------------------===//
458// Define generic 27 micro-op types
459
460let Latency = 11, NumMicroOps = 27 in
461def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
462                                            V1UnitL01, V1UnitL01, V1UnitL01,
463                                            V1UnitL01, V1UnitL01, V1UnitL01,
464                                            V1UnitS, V1UnitS, V1UnitS,
465                                            V1UnitS, V1UnitS, V1UnitS,
466                                            V1UnitS, V1UnitS, V1UnitS,
467                                            V1UnitV, V1UnitV, V1UnitV,
468                                            V1UnitV, V1UnitV, V1UnitV,
469                                            V1UnitV, V1UnitV, V1UnitV]>;
470
471
472// Miscellaneous Instructions
473// -----------------------------------------------------------------------------
474
475// COPY
476def : InstRW<[V1Write_1c_1I], (instrs COPY)>;
477
478// MSR
479def : WriteRes<WriteSys, []> { let Latency = 1; }
480
481
482// Branch Instructions
483// -----------------------------------------------------------------------------
484
485// Branch, immed
486// Compare and branch
487def : SchedAlias<WriteBr, V1Write_1c_1B>;
488
489// Branch, register
490def : SchedAlias<WriteBrReg, V1Write_1c_1B>;
491
492// Branch and link, immed
493// Branch and link, register
494def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>;
495
496// Compare and branch
497def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
498
499
500// Arithmetic and Logical Instructions
501// -----------------------------------------------------------------------------
502
503// ALU, basic
504// Conditional compare
505// Conditional select
506// Logical, basic
507// Address generation
508// Count leading
509// Reverse bits/bytes
510// Move immediate
511def : SchedAlias<WriteI, V1Write_1c_1I>;
512
513// ALU, basic, flagset
514def : InstRW<[V1Write_1c_1J],
515             (instregex "^(ADD|SUB)S[WX]r[ir]$",
516                        "^(ADC|SBC)S[WX]r$",
517                        "^ANDS[WX]ri$",
518                        "^(AND|BIC)S[WX]rr$")>;
519
520// ALU, extend and shift
521def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
522
523// Arithmetic, LSL shift, shift <= 4
524// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
525def V1WriteISReg : SchedWriteVariant<
526                     [SchedVar<IsCheapLSL,  [V1Write_1c_1I]>,
527                      SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
528def              : SchedAlias<WriteISReg, V1WriteISReg>;
529
530// Arithmetic, flagset, LSL shift, shift <= 4
531// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4
532def V1WriteISRegS : SchedWriteVariant<
533                      [SchedVar<IsCheapLSL,  [V1Write_1c_1J]>,
534                       SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
535def               : InstRW<[V1WriteISRegS],
536                           (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
537
538// Logical, shift, no flagset
539def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
540
541// Logical, shift, flagset
542def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
543
544// Flag manipulation instructions
545def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>;
546
547
548// Divide and multiply instructions
549// -----------------------------------------------------------------------------
550
551// Divide
552def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
553def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
554
555// Multiply
556// Multiply accumulate
557// Multiply accumulate, long
558// Multiply long
559def V1WriteIM : SchedWriteVariant<
560                  [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>,
561                   SchedVar<NoSchedPred,          [V1Write_2c_1M0]>]>;
562def           : SchedAlias<WriteIM32, V1WriteIM>;
563def           : SchedAlias<WriteIM64, V1WriteIM>;
564
565// Multiply high
566def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
567
568
569// Pointer Authentication Instructions (v8.3 PAC)
570// -----------------------------------------------------------------------------
571
572// Authenticate data address
573// Authenticate instruction address
574// Compute pointer authentication code for data address
575// Compute pointer authentication code, using generic key
576// Compute pointer authentication code for instruction address
577def : InstRW<[V1Write_5c_1M0], (instregex "^AUT",
578                                          "^PAC")>;
579
580// Branch and link, register, with pointer authentication
581// Branch, register, with pointer authentication
582// Branch, return, with pointer authentication
583def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$",
584                                             "^E?RETA[AB]$")>;
585
586// Load register, with pointer authentication
587def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
588
589// Strip pointer authentication code
590def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
591
592
593// Miscellaneous data-processing instructions
594// -----------------------------------------------------------------------------
595
596// Bitfield extract, one reg
597// Bitfield extract, two regs
598def V1WriteExtr : SchedWriteVariant<
599                    [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>,
600                     SchedVar<NoSchedPred,       [V1Write_3c_1I_1M]>]>;
601def : SchedAlias<WriteExtr, V1WriteExtr>;
602
603// Bitfield move, basic
604// Variable shift
605def : SchedAlias<WriteIS, V1Write_1c_1I>;
606
607// Bitfield move, insert
608def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
609
610// Move immediate
611def : SchedAlias<WriteImm, V1Write_1c_1I>;
612
613
614// Load instructions
615// -----------------------------------------------------------------------------
616
617// Load register, immed offset
618def : SchedAlias<WriteLD, V1Write_4c_1L>;
619
620// Load register, immed offset, index
621def : SchedAlias<WriteLDIdx, V1Write_4c_1L>;
622def : SchedAlias<WriteAdr,   V1Write_1c_1I>;
623
624// Load pair, immed offset
625def : SchedAlias<WriteLDHi, V1Write_4c_1L>;
626def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
627def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z, WriteAdr],
628             (instrs LDPWpost, LDPWpre)>;
629
630// Load pair, signed immed offset, signed words
631def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
632
633// Load pair, immed post or pre-index, signed words
634def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z, WriteAdr],
635             (instrs LDPSWpost, LDPSWpre)>;
636
637
638// Store instructions
639// -----------------------------------------------------------------------------
640
641// Store register, immed offset
642def : SchedAlias<WriteST, V1Write_1c_1L01_1D>;
643
644// Store register, immed offset, index
645def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>;
646
647// Store pair, immed offset
648def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>;
649
650
651// FP data processing instructions
652// -----------------------------------------------------------------------------
653
654// FP absolute value
655// FP arithmetic
656// FP min/max
657// FP negate
658def : SchedAlias<WriteF, V1Write_2c_1V>;
659
660// FP compare
661def : SchedAlias<WriteFCmp, V1Write_2c_1V0>;
662
663// FP divide
664// FP square root
665def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>;
666
667// FP divide, H-form
668// FP square root, H-form
669def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>;
670
671// FP divide, S-form
672// FP square root, S-form
673def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>;
674
675// FP divide, D-form
676def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
677
678// FP square root, D-form
679def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
680
681// FP multiply
682def : SchedAlias<WriteFMul, V1Write_3c_1V>;
683
684// FP multiply accumulate
685def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
686
687// FP round to integral
688def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
689                                           "^FRINT(32|64)[XZ][SD]r$")>;
690
691// FP select
692def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>;
693
694
695// FP miscellaneous instructions
696// -----------------------------------------------------------------------------
697
698// FP convert, from gen to vec reg
699def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
700
701// FP convert, from vec to gen reg
702def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
703
704// FP convert, Javascript from vec to gen reg
705def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>;
706
707// FP convert, from vec to vec reg
708def : SchedAlias<WriteFCvt, V1Write_3c_1V02>;
709
710// FP move, immed
711def : SchedAlias<WriteFImm, V1Write_2c_1V>;
712
713// FP move, register
714def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
715
716// FP transfer, from gen to low half of vec reg
717def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
718
719// FP transfer, from gen to high half of vec reg
720def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
721
722// FP transfer, from vec to gen reg
723def : SchedAlias<WriteFCopy, V1Write_2c_1V1>;
724
725
726// FP load instructions
727// -----------------------------------------------------------------------------
728
729// Load vector reg, literal, S/D/Q forms
730// Load vector reg, unscaled immed
731// Load vector reg, unsigned immed
732def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
733                                                      "^LDUR[BHSDQ]i$",
734                                                      "^LDR[BHSDQ]ui$")>;
735
736// Load vector reg, immed post-index
737// Load vector reg, immed pre-index
738def : InstRW<[V1Write_6c_1L, WriteAdr],
739             (instregex "^LDR[BHSDQ](post|pre)$")>;
740
741// Load vector reg, register offset, basic
742// Load vector reg, register offset, scale, S/D-form
743// Load vector reg, register offset, extend
744// Load vector reg, register offset, extend, scale, S/D-form
745def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
746
747// Load vector reg, register offset, scale, H/Q-form
748// Load vector reg, register offset, extend, scale, H/Q-form
749def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
750
751// Load vector pair, immed offset, S/D-form
752def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>;
753
754// Load vector pair, immed offset, Q-form
755def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
756
757// Load vector pair, immed post-index, S/D-form
758// Load vector pair, immed pre-index, S/D-form
759def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z, WriteAdr],
760             (instregex "^LDP[SD](pre|post)$")>;
761
762// Load vector pair, immed post-index, Q-form
763// Load vector pair, immed pre-index, Q-form
764def : InstRW<[V1Write_6c_1L, WriteLDHi, WriteAdr],
765             (instrs LDPQpost, LDPQpre)>;
766
767
768// FP store instructions
769// -----------------------------------------------------------------------------
770
771// Store vector reg, unscaled immed, B/H/S/D/Q-form
772def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
773
774// Store vector reg, immed post-index, B/H/S/D/Q-form
775// Store vector reg, immed pre-index, B/H/S/D/Q-form
776def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
777             (instregex "^STR[BHSDQ](pre|post)$")>;
778
779// Store vector reg, unsigned immed, B/H/S/D/Q-form
780def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
781
782// Store vector reg, register offset, basic, B/S/D-form
783// Store vector reg, register offset, scale, B/S/D-form
784// Store vector reg, register offset, extend, B/S/D-form
785// Store vector reg, register offset, extend, scale, B/S/D-form
786def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase],
787             (instregex "^STR[BSD]ro[WX]$")>;
788
789// Store vector reg, register offset, basic, H/Q-form
790// Store vector reg, register offset, scale, H/Q-form
791// Store vector reg, register offset, extend, H/Q-form
792// Store vector reg, register offset, extend, scale, H/Q-form
793def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase],
794             (instregex "^STR[HQ]ro[WX]$")>;
795
796// Store vector pair, immed offset, S/D/Q-form
797def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>;
798
799// Store vector pair, immed post-index, S/D-form
800// Store vector pair, immed pre-index, S/D-form
801def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
802             (instregex "^STP[SD](pre|post)$")>;
803
804// Store vector pair, immed post-index, Q-form
805// Store vector pair, immed pre-index, Q-form
806def : InstRW<[V1Write_2c_2L01_1V01, WriteAdr], (instrs STPQpre, STPQpost)>;
807
808
809// ASIMD integer instructions
810// -----------------------------------------------------------------------------
811
812// ASIMD absolute diff
813// ASIMD absolute diff long
814// ASIMD arith, basic
815// ASIMD arith, complex
816// ASIMD arith, pair-wise
817// ASIMD compare
818// ASIMD logical
819// ASIMD max/min, basic and pair-wise
820def : SchedAlias<WriteVd, V1Write_2c_1V>;
821def : SchedAlias<WriteVq, V1Write_2c_1V>;
822
823// ASIMD absolute diff accum
824// ASIMD absolute diff accum long
825// ASIMD pairwise add and accumulate long
826def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
827
828// ASIMD arith, reduce, 4H/4S
829// ASIMD max/min, reduce, 4H/4S
830def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$",
831                                           "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
832
833// ASIMD arith, reduce, 8B/8H
834// ASIMD max/min, reduce, 8B/8H
835def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$",
836                                              "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
837
838// ASIMD arith, reduce, 16B
839// ASIMD max/min, reduce, 16B
840def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
841                                           "[SU](MAX|MIN)Vv16i8v$")>;
842
843// ASIMD dot product
844// ASIMD dot product using signed and unsigned integers
845def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
846
847// ASIMD matrix multiply- accumulate
848def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>;
849
850// ASIMD multiply
851// ASIMD multiply accumulate
852// ASIMD multiply accumulate long
853// ASIMD multiply accumulate high
854// ASIMD multiply accumulate saturating long
855def : InstRW<[V1Write_4c_1V02],
856             (instregex "^MUL(v[148]i16|v[124]i32)$",
857                        "^SQR?DMULH(v[48]i16|v[24]i32)$",
858                        "^ML[AS](v[148]i16|v[124]i32)$",
859                        "^[SU]ML[AS]Lv",
860                        "^SQRDML[AS]H(v[148]i16|v[124]i32)$",
861                        "^SQDML[AS]Lv")>;
862
863// ASIMD multiply/multiply long (8x8) polynomial
864def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
865
866// ASIMD multiply long
867def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
868
869// ASIMD shift accumulate
870// ASIMD shift by immed, complex
871// ASIMD shift by register, complex
872def : InstRW<[V1Write_4c_1V13],
873             (instregex "^[SU]R?SRAv",
874                        "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
875                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
876                        "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
877                        "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
878
879// ASIMD shift by immed, basic
880// ASIMD shift by immed and insert, basic
881// ASIMD shift by register, basic
882def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
883                                          "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
884
885
886// ASIMD FP instructions
887// -----------------------------------------------------------------------------
888
889// ASIMD FP absolute value/difference
890// ASIMD FP arith, normal
891// ASIMD FP compare
892// ASIMD FP complex add
893// ASIMD FP max/min, normal
894// ASIMD FP max/min, pairwise
895// ASIMD FP negate
896// Covered by "SchedAlias (WriteV[dq]...)" above
897
898// ASIMD FP complex multiply add
899// ASIMD FP multiply accumulate
900def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$",
901                                         "^FML[AS]v")>;
902
903// ASIMD FP convert, long (F16 to F32)
904def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
905
906// ASIMD FP convert, long (F32 to F64)
907def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>;
908
909// ASIMD FP convert, narrow (F32 to F16)
910def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>;
911
912// ASIMD FP convert, narrow (F64 to F32)
913def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
914                                           "^FCVTXN(v[24]f32|v1i64)$")>;
915
916// ASIMD FP convert, other, D-form F32 and Q-form F64
917def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
918                                           "^[SU]CVTFv2f(32|64)$")>;
919
920// ASIMD FP convert, other, D-form F16 and Q-form F32
921def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
922                                           "^[SU]CVTFv4f(16|32)$")>;
923
924// ASIMD FP convert, other, Q-form F16
925def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
926                                           "^[SU]CVTFv8f16$")>;
927
928// ASIMD FP divide, D-form, F16
929// ASIMD FP square root, D-form, F16
930def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>;
931
932// ASIMD FP divide, F32
933// ASIMD FP square root, F32
934def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32,
935                                          FSQRTv2f32, FSQRTv4f32)>;
936
937// ASIMD FP divide, Q-form, F16
938def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>;
939
940// ASIMD FP divide, Q-form, F64
941def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>;
942
943// ASIMD FP square root, Q-form, F16
944def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>;
945
946// ASIMD FP square root, Q-form, F64
947def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>;
948
949// ASIMD FP max/min, reduce, F32 and D-form F16
950def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
951
952// ASIMD FP max/min, reduce, Q-form F16
953def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
954
955// ASIMD FP multiply
956def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>;
957
958// ASIMD FP multiply accumulate long
959def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
960
961// ASIMD FP round, D-form F32 and Q-form F64
962def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
963
964// ASIMD FP round, D-form F16 and Q-form F32
965def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
966
967// ASIMD FP round, Q-form F16
968def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
969
970
971// ASIMD BF instructions
972// -----------------------------------------------------------------------------
973
974// ASIMD convert, F32 to BF16
975def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
976
977// ASIMD dot product
978def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
979
980// ASIMD matrix multiply accumulate
981def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>;
982
983// ASIMD multiply accumulate long
984def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>;
985
986// Scalar convert, F32 to BF16
987def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
988
989
990// ASIMD miscellaneous instructions
991// -----------------------------------------------------------------------------
992
993// ASIMD bit reverse
994// ASIMD bitwise insert
995// ASIMD count
996// ASIMD duplicate, element
997// ASIMD extract
998// ASIMD extract narrow
999// ASIMD insert, element to element
1000// ASIMD move, FP immed
1001// ASIMD move, integer immed
1002// ASIMD reverse
1003// ASIMD table lookup, 1 or 2 table regs
1004// ASIMD table lookup extension, 1 table reg
1005// ASIMD transfer, element to gen reg
1006// ASIMD transpose
1007// ASIMD unzip/zip
1008// Covered by "SchedAlias (WriteV[dq]...)" above
1009
1010// ASIMD duplicate, gen reg
1011def : InstRW<[V1Write_3c_1M0],
1012             (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
1013
1014// ASIMD extract narrow, saturating
1015def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1016
1017// ASIMD reciprocal and square root estimate, D-form U32
1018// ASIMD reciprocal and square root estimate, D-form F32 and F64
1019def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32,
1020                                        URSQRTEv2i32,
1021                                        FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
1022                                        FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>;
1023
1024// ASIMD reciprocal and square root estimate, Q-form U32
1025// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64
1026def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32,
1027                                        URSQRTEv4i32,
1028                                        FRECPEv1f16, FRECPEv4f16,
1029                                        FRECPEv4f32, FRECPEv2f64,
1030                                        FRSQRTEv1f16, FRSQRTEv4f16,
1031                                        FRSQRTEv4f32, FRSQRTEv2f64)>;
1032
1033// ASIMD reciprocal and square root estimate, Q-form F16
1034def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16,
1035                                        FRSQRTEv8f16)>;
1036
1037// ASIMD reciprocal exponent
1038def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>;
1039
1040// ASIMD reciprocal step
1041def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
1042                                         "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
1043
1044// ASIMD table lookup, 1 or 2 table regs
1045// ASIMD table lookup extension, 1 table reg
1046def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$",
1047                                           "^TBXv(8|16)i8One$")>;
1048
1049// ASIMD table lookup, 3 table regs
1050// ASIMD table lookup extension, 2 table reg
1051def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three,
1052                                        TBXv8i8Two, TBXv16i8Two)>;
1053
1054// ASIMD table lookup, 4 table regs
1055def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1056
1057// ASIMD table lookup extension, 3 table reg
1058def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>;
1059
1060// ASIMD table lookup extension, 4 table reg
1061def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>;
1062
1063// ASIMD transfer, element to gen reg
1064def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
1065                                         "^UMOVvi(8|16|32|64)$")>;
1066
1067// ASIMD transfer, gen reg to element
1068def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1069
1070
1071// ASIMD load instructions
1072// -----------------------------------------------------------------------------
1073
1074// ASIMD load, 1 element, multiple, 1 reg
1075def : InstRW<[V1Write_6c_1L],
1076             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1077def : InstRW<[V1Write_6c_1L, WriteAdr],
1078             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1079
1080// ASIMD load, 1 element, multiple, 2 reg
1081def : InstRW<[V1Write_6c_2L],
1082             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1083def : InstRW<[V1Write_6c_2L, WriteAdr],
1084             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1085
1086// ASIMD load, 1 element, multiple, 3 reg
1087def : InstRW<[V1Write_6c_3L],
1088             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1089def : InstRW<[V1Write_6c_3L, WriteAdr],
1090             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1091
1092// ASIMD load, 1 element, multiple, 4 reg, D-form
1093def : InstRW<[V1Write_6c_2L],
1094             (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1095def : InstRW<[V1Write_6c_2L, WriteAdr],
1096             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1097
1098// ASIMD load, 1 element, multiple, 4 reg, Q-form
1099def : InstRW<[V1Write_7c_4L],
1100             (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1101def : InstRW<[V1Write_7c_4L, WriteAdr],
1102             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1103
1104// ASIMD load, 1 element, one lane
1105// ASIMD load, 1 element, all lanes
1106def : InstRW<[V1Write_8c_1L_1V],
1107             (instregex "^LD1(i|Rv)(8|16|32|64)$",
1108                        "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1109def : InstRW<[V1Write_8c_1L_1V, WriteAdr],
1110             (instregex "^LD1i(8|16|32|64)_POST$",
1111                        "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1112
1113// ASIMD load, 2 element, multiple, D-form
1114def : InstRW<[V1Write_8c_1L_2V],
1115             (instregex "^LD2Twov(8b|4h|2s)$")>;
1116def : InstRW<[V1Write_8c_1L_2V, WriteAdr],
1117             (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
1118
1119// ASIMD load, 2 element, multiple, Q-form
1120def : InstRW<[V1Write_8c_2L_2V],
1121             (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
1122def : InstRW<[V1Write_8c_2L_2V, WriteAdr],
1123             (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
1124
1125// ASIMD load, 2 element, one lane
1126// ASIMD load, 2 element, all lanes
1127def : InstRW<[V1Write_8c_1L_2V],
1128             (instregex "^LD2i(8|16|32|64)$",
1129                        "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1130def : InstRW<[V1Write_8c_1L_2V, WriteAdr],
1131             (instregex "^LD2i(8|16|32|64)_POST$",
1132                        "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1133
1134// ASIMD load, 3 element, multiple, D-form
1135// ASIMD load, 3 element, one lane
1136// ASIMD load, 3 element, all lanes
1137def : InstRW<[V1Write_8c_2L_3V],
1138             (instregex "^LD3Threev(8b|4h|2s)$",
1139                        "^LD3i(8|16|32|64)$",
1140                        "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1141def : InstRW<[V1Write_8c_2L_3V, WriteAdr],
1142             (instregex "^LD3Threev(8b|4h|2s)_POST$",
1143                        "^LD3i(8|16|32|64)_POST$",
1144                        "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1145
1146// ASIMD load, 3 element, multiple, Q-form
1147def : InstRW<[V1Write_8c_3L_3V],
1148             (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
1149def : InstRW<[V1Write_8c_3L_3V, WriteAdr],
1150             (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
1151
1152// ASIMD load, 4 element, multiple, D-form
1153// ASIMD load, 4 element, one lane
1154// ASIMD load, 4 element, all lanes
1155def : InstRW<[V1Write_8c_3L_4V],
1156             (instregex "^LD4Fourv(8b|4h|2s)$",
1157                        "^LD4i(8|16|32|64)$",
1158                        "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1159def : InstRW<[V1Write_8c_3L_4V, WriteAdr],
1160             (instregex "^LD4Fourv(8b|4h|2s)_POST$",
1161                        "^LD4i(8|16|32|64)_POST$",
1162                        "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1163
1164// ASIMD load, 4 element, multiple, Q-form
1165def : InstRW<[V1Write_9c_4L_4V],
1166             (instregex "^LD4Fourv(16b|8h|4s|2d)$")>;
1167def : InstRW<[V1Write_9c_4L_4V, WriteAdr],
1168             (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>;
1169
1170
1171// ASIMD store instructions
1172// -----------------------------------------------------------------------------
1173
1174// ASIMD store, 1 element, multiple, 1 reg
1175// ASIMD store, 1 element, multiple, 2 reg, D-form
1176def : InstRW<[V1Write_2c_1L01_1V01],
1177             (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$",
1178                        "^ST1Twov(8b|4h|2s|1d)$")>;
1179def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
1180             (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$",
1181                        "^ST1Twov(8b|4h|2s|1d)_POST$")>;
1182
1183// ASIMD store, 1 element, multiple, 2 reg, Q-form
1184// ASIMD store, 1 element, multiple, 3 reg, D-form
1185// ASIMD store, 1 element, multiple, 4 reg, D-form
1186def : InstRW<[V1Write_2c_2L01_2V01],
1187             (instregex "^ST1Twov(16b|8h|4s|2d)$",
1188                        "^ST1Threev(8b|4h|2s|1d)$",
1189                        "^ST1Fourv(8b|4h|2s|1d)$")>;
1190def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr],
1191             (instregex "^ST1Twov(16b|8h|4s|2d)_POST$",
1192                        "^ST1Threev(8b|4h|2s|1d)_POST$",
1193                        "^ST1Fourv(8b|4h|2s|1d)_POST$")>;
1194
1195// ASIMD store, 1 element, multiple, 3 reg, Q-form
1196def : InstRW<[V1Write_2c_3L01_3V01],
1197             (instregex "^ST1Threev(16b|8h|4s|2d)$")>;
1198def : InstRW<[V1Write_2c_3L01_3V01, WriteAdr],
1199             (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>;
1200
1201// ASIMD store, 1 element, multiple, 4 reg, Q-form
1202def : InstRW<[V1Write_2c_4L01_4V01],
1203             (instregex "^ST1Fourv(16b|8h|4s|2d)$")>;
1204def : InstRW<[V1Write_2c_4L01_4V01, WriteAdr],
1205             (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>;
1206
1207// ASIMD store, 1 element, one lane
1208// ASIMD store, 2 element, multiple, D-form
1209// ASIMD store, 2 element, one lane
1210def : InstRW<[V1Write_4c_1L01_1V01],
1211             (instregex "^ST1i(8|16|32|64)$",
1212                        "^ST2Twov(8b|4h|2s)$",
1213                        "^ST2i(8|16|32|64)$")>;
1214def : InstRW<[V1Write_4c_1L01_1V01, WriteAdr],
1215             (instregex "^ST1i(8|16|32|64)_POST$",
1216                        "^ST2Twov(8b|4h|2s)_POST$",
1217                        "^ST2i(8|16|32|64)_POST$")>;
1218
1219// ASIMD store, 2 element, multiple, Q-form
1220// ASIMD store, 3 element, multiple, D-form
1221// ASIMD store, 3 element, one lane
1222// ASIMD store, 4 element, one lane, D
1223def : InstRW<[V1Write_4c_2L01_2V01],
1224             (instregex "^ST2Twov(16b|8h|4s|2d)$",
1225                        "^ST3Threev(8b|4h|2s)$",
1226                        "^ST3i(8|16|32|64)$",
1227                        "^ST4i64$")>;
1228def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr],
1229             (instregex "^ST2Twov(16b|8h|4s|2d)_POST$",
1230                        "^ST3Threev(8b|4h|2s)_POST$",
1231                        "^ST3i(8|16|32|64)_POST$",
1232                        "^ST4i64_POST$")>;
1233
1234// ASIMD store, 3 element, multiple, Q-form
1235def : InstRW<[V1Write_5c_3L01_3V01],
1236             (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
1237def : InstRW<[V1Write_5c_3L01_3V01, WriteAdr],
1238             (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
1239
1240// ASIMD store, 4 element, multiple, D-form
1241def : InstRW<[V1Write_6c_3L01_3V01],
1242             (instregex "^ST4Fourv(8b|4h|2s)$")>;
1243def : InstRW<[V1Write_6c_3L01_3V01, WriteAdr],
1244             (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
1245
1246// ASIMD store, 4 element, multiple, Q-form, B/H/S
1247def : InstRW<[V1Write_7c_6L01_6V01],
1248             (instregex "^ST4Fourv(16b|8h|4s)$")>;
1249def : InstRW<[V1Write_7c_6L01_6V01, WriteAdr],
1250             (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
1251
1252// ASIMD store, 4 element, multiple, Q-form, D
1253def : InstRW<[V1Write_4c_4L01_4V01],
1254             (instrs ST4Fourv2d)>;
1255def : InstRW<[V1Write_4c_4L01_4V01, WriteAdr],
1256             (instrs ST4Fourv2d_POST)>;
1257
1258// ASIMD store, 4 element, one lane, B/H/S
1259def : InstRW<[V1Write_6c_3L_3V],
1260             (instregex "^ST4i(8|16|32)$")>;
1261def : InstRW<[V1Write_6c_3L_3V, WriteAdr],
1262             (instregex "^ST4i(8|16|32)_POST$")>;
1263
1264
1265// Cryptography extensions
1266// -----------------------------------------------------------------------------
1267
1268// Crypto polynomial (64x64) multiply long
1269// Covered by "SchedAlias (WriteV[dq]...)" above
1270
1271// Crypto AES ops
1272def V1WriteVC : WriteSequence<[V1Write_2c_1V]>;
1273def V1ReadVC  : SchedReadAdvance<2, [V1WriteVC]>;
1274def           : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>;
1275def           : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>;
1276
1277// Crypto SHA1 hash acceleration op
1278// Crypto SHA1 schedule acceleration ops
1279// Crypto SHA256 schedule acceleration ops
1280// Crypto SHA512 hash acceleration ops
1281// Crypto SM3 ops
1282def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$",
1283                                          "^SHA256SU[01]rr$",
1284                                          "^SHA512(H2?|SU[01])$",
1285                                          "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>;
1286
1287// Crypto SHA1 hash acceleration ops
1288// Crypto SHA256 hash acceleration ops
1289// Crypto SM4 ops
1290def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
1291                                          "^SHA256H2?rrr$",
1292                                          "^SM4E(KEY)?$")>;
1293
1294// Crypto SHA3 ops
1295def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1296
1297
1298// CRC instruction
1299// -----------------------------------------------------------------------------
1300
1301// CRC checksum ops
1302def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>;
1303
1304
1305// SVE Predicate instructions
1306// -----------------------------------------------------------------------------
1307
1308// Loop control, based on predicate
1309def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>;
1310def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1311
1312// Loop control, based on predicate and flag setting
1313def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP,
1314                                       BRKPAS_PPzPP, BRKPBS_PPzPP)>;
1315
1316// Loop control, based on GPR
1317def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1318
1319// Loop terminate
1320def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1321
1322// Predicate counting scalar
1323// Predicate counting scalar, active predicate
1324def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1325def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$",
1326                                          "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1327                                          "^UQ(DEC|INC)[BHWD]_WPiI$",
1328                                          "^CNTP_XPP_[BHSD]$",
1329                                          "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$",
1330                                          "^UQ(DEC|INC)P_WP_[BHSD]$",
1331                                          "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>;
1332
1333// Predicate counting vector, active predicate
1334def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
1335
1336// Predicate logical
1337def : InstRW<[V1Write_1c_1M0],
1338             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1339
1340// Predicate logical, flag setting
1341def : InstRW<[V1Write_2c_2M0],
1342             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>;
1343
1344// Predicate reverse
1345// Predicate set/initialize/find next
1346// Predicate transpose
1347// Predicate unpack and widen
1348// Predicate zip/unzip
1349def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$",
1350                                          "^PFALSE$", "^PFIRST_B$",
1351                                          "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$",
1352                                          "^TRN[12]_PPP_[BHSDQ]$",
1353                                          "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1354
1355// Predicate set/initialize/find next
1356// Predicate unpack and widen
1357def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP,
1358                                       PUNPKHI_PP, PUNPKLO_PP)>;
1359
1360// Predicate select
1361def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>;
1362
1363// Predicate set/initialize, set flags
1364def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>;
1365
1366
1367
1368// SVE integer instructions
1369// -----------------------------------------------------------------------------
1370
1371// Arithmetic, basic
1372// Logical
1373def : InstRW<[V1Write_2c_1V01],
1374             (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$",
1375                        "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$",
1376                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
1377                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
1378                        "^[SU]ABD_ZP[mZ]Z_[BHSD]$",
1379                        "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$",
1380                        "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$",
1381                        "^SUBR_Z(I|P[mZ]Z)_[BHSD]$",
1382                        "^(AND|EOR|ORR)_ZI$",
1383                        "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
1384                        "^EOR(BT|TB)_ZZZ_[BHSD]$",
1385                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
1386
1387// Arithmetic, shift
1388def : InstRW<[V1Write_2c_1V1],
1389             (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]",
1390                        "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]",
1391                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
1392                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1393                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
1394
1395// Arithmetic, shift right for divide
1396def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>;
1397
1398// Count/reverse bits
1399def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
1400
1401// Broadcast logical bitmask immediate to vector
1402def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>;
1403
1404// Compare and set flags
1405def : InstRW<[V1Write_4c_1M0_1V0],
1406             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1407                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1408
1409// Conditional extract operations, scalar form
1410def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1411
1412// Conditional extract operations, SIMD&FP scalar and vector forms
1413def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1414                                          "^COMPACT_ZPZ_[SD]$",
1415                                          "^SPLICE_ZPZZ?_[BHSD]$")>;
1416
1417// Convert to floating point, 64b to float or convert to double
1418def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1419                                          "^[SU]CVTF_ZPmZ_StoD")>;
1420
1421// Convert to floating point, 32b to single or half
1422def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
1423
1424// Convert to floating point, 16b to half
1425def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
1426
1427// Copy, scalar
1428def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
1429
1430// Copy, scalar SIMD&FP or imm
1431def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
1432
1433// Divides, 32 bit
1434def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
1435
1436// Divides, 64 bit
1437def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
1438
1439// Dot product, 8 bit
1440def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>;
1441
1442// Dot product, 8 bit, using signed and unsigned integers
1443def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
1444
1445// Dot product, 16 bit
1446def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>;
1447
1448// Duplicate, immediate and indexed form
1449def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
1450                                           "^DUP_ZZI_[BHSDQ]$")>;
1451
1452// Duplicate, scalar form
1453def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1454
1455// Extend, sign or zero
1456def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
1457                                          "^[SU]XTH_ZPmZ_[SD]$",
1458                                          "^[SU]XTW_ZPmZ_[D]$")>;
1459
1460// Extract
1461def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>;
1462
1463// Extract/insert operation, SIMD and FP scalar form
1464def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1465                                          "^INSR_ZV_[BHSD]$")>;
1466
1467// Extract/insert operation, scalar
1468def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1469                                              "^INSR_ZR_[BHSD]$")>;
1470
1471// Horizontal operations, B, H, S form, imm, imm
1472def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>;
1473
1474// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar
1475def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1476
1477// Horizontal operations, D form, imm, imm
1478def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>;
1479
1480// Horizontal operations, D form, scalar, imm / scalar / imm, scalar
1481def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1482
1483// Move prefix
1484def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1485                                           "^MOVPRFX_ZZ$")>;
1486
1487// Matrix multiply-accumulate
1488def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1489
1490// Multiply, B, H, S element size
1491def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$",
1492                                          "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
1493
1494// Multiply, D element size
1495// Multiply accumulate, D element size
1496def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$",
1497                                          "^[SU]MULH_ZPmZ_D$",
1498                                          "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>;
1499
1500// Multiply accumulate, B, H, S element size
1501// NOTE: This is not specified in the SOG.
1502def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
1503
1504// Predicate counting vector
1505def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
1506
1507// Reduction, arithmetic, B form
1508def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13],
1509             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1510
1511// Reduction, arithmetic, H form
1512def : InstRW<[V1Write_12c_1V_1V01_2V1],
1513             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1514
1515// Reduction, arithmetic, S form
1516def : InstRW<[V1Write_10c_1V_1V01_2V1],
1517             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1518
1519// Reduction, arithmetic, D form
1520def : InstRW<[V1Write_8c_1V_1V01],
1521             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1522
1523// Reduction, logical
1524def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
1525
1526// Reverse, vector
1527def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
1528                                           "^REVB_ZPmZ_[HSD]$",
1529                                           "^REVH_ZPmZ_[SD]$",
1530                                           "^REVW_ZPmZ_D$")>;
1531
1532// Select, vector form
1533// Table lookup
1534// Table lookup extension
1535// Transpose, vector form
1536// Unpack and extend
1537// Zip/unzip
1538def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$",
1539                                           "^TB[LX]_ZZZ_[BHSD]$",
1540                                           "^TRN[12]_ZZZ_[BHSDQ]$",
1541                                           "^[SU]UNPK(HI|LO)_ZZ_[HSD]$",
1542                                           "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1543
1544
1545// SVE floating-point instructions
1546// -----------------------------------------------------------------------------
1547
1548// Floating point absolute value/difference
1549// Floating point arithmetic
1550def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$",
1551                                           "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
1552                                           "^FADDP_ZPmZZ_[HSD]$",
1553                                           "^FNEG_ZPmZ_[HSD]$",
1554                                           "^FSUBR_ZPm[IZ]_[HSD]$")>;
1555
1556// Floating point associative add, F16
1557def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>;
1558
1559// Floating point associative add, F32
1560def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>;
1561
1562// Floating point associative add, F64
1563def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>;
1564
1565// Floating point compare
1566def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
1567                                          "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$",
1568                                          "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>;
1569
1570// Floating point complex add
1571def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1572
1573// Floating point complex multiply add
1574def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1575                                           "^FCMLA_ZZZI_[HS]$")>;
1576
1577// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1578// Floating point convert to integer, F32
1579def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
1580                                          "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
1581
1582// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
1583// Floating point convert to integer, F64
1584def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
1585                                          "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
1586
1587// Floating point convert to integer, F16
1588def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
1589
1590// Floating point copy
1591def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
1592                                           "^FDUP_ZI_[HSD]$")>;
1593
1594// Floating point divide, F16
1595def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
1596
1597// Floating point divide, F32
1598def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
1599
1600// Floating point divide, F64
1601def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
1602
1603// Floating point min/max
1604def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
1605
1606// Floating point multiply
1607def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$",
1608                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
1609
1610// Floating point multiply accumulate
1611// Floating point reciprocal step
1612def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
1613                                           "^FML[AS]_ZZZI_[HSD]$",
1614                                           "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
1615
1616// Floating point reciprocal estimate, F16
1617def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
1618
1619// Floating point reciprocal estimate, F32
1620def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
1621
1622// Floating point reciprocal estimate, F64
1623def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
1624
1625// Floating point reciprocal exponent
1626def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>;
1627
1628// Floating point reduction, F16
1629def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
1630
1631// Floating point reduction, F32
1632def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
1633
1634// Floating point reduction, F64
1635def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
1636
1637// Floating point round to integral, F16
1638def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
1639
1640// Floating point round to integral, F32
1641def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
1642
1643// Floating point round to integral, F64
1644def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
1645
1646// Floating point square root, F16
1647def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>;
1648
1649// Floating point square root, F32
1650def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>;
1651
1652// Floating point square root, F64
1653def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>;
1654
1655// Floating point trigonometric
1656def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
1657                                           "^FTMAD_ZZI_[HSD]$",
1658                                           "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
1659
1660
1661// SVE BFloat16 (BF16) instructions
1662// -----------------------------------------------------------------------------
1663
1664// Convert, F32 to BF16
1665def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
1666
1667// Dot product
1668def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
1669
1670// Matrix multiply accumulate
1671def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>;
1672
1673// Multiply accumulate long
1674def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
1675
1676
1677// SVE Load instructions
1678// -----------------------------------------------------------------------------
1679
1680// Load vector
1681def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>;
1682
1683// Load predicate
1684def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>;
1685
1686// Contiguous load, scalar + imm
1687// Contiguous load, scalar + scalar
1688// Contiguous load broadcast, scalar + imm
1689// Contiguous load broadcast, scalar + scalar
1690def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM_REAL$",
1691                                           "^LD1S?B_[HSD]_IMM_REAL$",
1692                                           "^LD1S?H_[SD]_IMM_REAL$",
1693                                           "^LD1S?W_D_IMM_REAL$",
1694                                           "^LD1[BWD]$",
1695                                           "^LD1S?B_[HSD]$",
1696                                           "^LD1S?W_D$",
1697                                           "^LD1R[BHWD]_IMM$",
1698                                           "^LD1RSW_IMM$",
1699                                           "^LD1RS?B_[HSD]_IMM$",
1700                                           "^LD1RS?H_[SD]_IMM$",
1701                                           "^LD1RS?W_D_IMM$",
1702                                           "^LD1RQ_[BHWD]_IMM$",
1703                                           "^LD1RQ_[BWD]$")>;
1704def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$",
1705                                              "^LD1S?H_[SD]$",
1706                                              "^LD1RQ_H$")>;
1707
1708// Non temporal load, scalar + imm
1709def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>;
1710
1711// Non temporal load, scalar + scalar
1712def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
1713def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;
1714
1715// Contiguous first faulting load, scalar + scalar
1716def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$",
1717                                              "^LDFF1S?H_[SD]_REAL$")>;
1718def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$",
1719                                              "^LDFF1S?B_[HSD]_REAL$",
1720                                              "^LDFF1S?W_D_REAL$")>;
1721
1722// Contiguous non faulting load, scalar + imm
1723def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$",
1724                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
1725                                           "^LDNF1S?H_[SD]_IMM_REAL$",
1726                                           "^LDNF1S?W_D_IMM_REAL$")>;
1727
1728// Contiguous Load two structures to two vectors, scalar + imm
1729def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
1730
1731// Contiguous Load two structures to two vectors, scalar + scalar
1732def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>;
1733def : InstRW<[V1Write_9c_2L01_2V01],  (instregex "^LD2[BWD]$")>;
1734
1735// Contiguous Load three structures to three vectors, scalar + imm
1736def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>;
1737
1738// Contiguous Load three structures to three vectors, scalar + scalar
1739def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>;
1740
1741// Contiguous Load four structures to four vectors, scalar + imm
1742def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
1743
1744// Contiguous Load four structures to four vectors, scalar + scalar
1745def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
1746
1747// Gather load, vector + imm, 32-bit element size
1748def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
1749                                             "^GLD(FF)?1W_IMM_REAL$")>;
1750
1751// Gather load, vector + imm, 64-bit element size
1752def : InstRW<[V1Write_9c_2L_2V],
1753             (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
1754                        "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$",
1755                        "^GLD(FF)?1D_IMM_REAL$",
1756                        "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>;
1757
1758// Gather load, 32-bit scaled offset
1759def : InstRW<[V1Write_11c_2L_2V],
1760             (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
1761                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
1762
1763// Gather load, 32-bit unpacked unscaled offset
1764def : InstRW<[V1Write_9c_1L_1V],
1765             (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
1766                        "^GLD(FF)?1W_[SU]XTW_REAL$")>;
1767
1768// Prefetch
1769// NOTE: This is not specified in the SOG.
1770def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>;
1771
1772
1773// SVE Store instructions
1774// -----------------------------------------------------------------------------
1775
1776// Store from predicate reg
1777def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>;
1778
1779// Store from vector reg
1780def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>;
1781
1782// Contiguous store, scalar + imm
1783// Contiguous store, scalar + scalar
1784def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
1785                                              "^ST1B_[HSD]_IMM$",
1786                                              "^ST1H_[SD]_IMM$",
1787                                              "^ST1W_D_IMM$",
1788                                              "^ST1[BWD]$",
1789                                              "^ST1B_[HSD]$",
1790                                              "^ST1W_D$")>;
1791def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
1792
1793// Contiguous store two structures from two vectors, scalar + imm
1794// Contiguous store two structures from two vectors, scalar + scalar
1795def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$",
1796                                              "^ST2[BWD]$")>;
1797def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>;
1798
1799// Contiguous store three structures from three vectors, scalar + imm
1800def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
1801
1802// Contiguous store three structures from three vectors, scalar + scalar
1803def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>;
1804
1805// Contiguous store four structures from four vectors, scalar + imm
1806def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
1807
1808// Contiguous store four structures from four vectors, scalar + scalar
1809def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>;
1810
1811// Non temporal store, scalar + imm
1812// Non temporal store, scalar + scalar
1813def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$",
1814                                              "^STNT1[BWD]_ZRR$")>;
1815def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
1816
1817// Scatter store vector + imm 32-bit element size
1818// Scatter store, 32-bit scaled offset
1819// Scatter store, 32-bit unscaled offset
1820def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
1821                                               "^SST1W_IMM$",
1822                                               "^SST1(H_S|W)_[SU]XTW_SCALED$",
1823                                               "^SST1[BH]_S_[SU]XTW$",
1824                                               "^SST1W_[SU]XTW$")>;
1825
1826// Scatter store, 32-bit unpacked unscaled offset
1827// Scatter store, 32-bit unpacked scaled offset
1828def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
1829                                              "^SST1D_[SU]XTW$",
1830                                              "^SST1[HW]_D_[SU]XTW_SCALED$",
1831                                              "^SST1D_[SU]XTW_SCALED$")>;
1832
1833// Scatter store vector + imm 64-bit element size
1834// Scatter store, 64-bit scaled offset
1835// Scatter store, 64-bit unscaled offset
1836def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
1837                                              "^SST1D_IMM$",
1838                                              "^SST1[HW]_D_SCALED$",
1839                                              "^SST1D_SCALED$",
1840                                              "^SST1[BHW]_D$",
1841                                              "^SST1D$")>;
1842
1843
1844// SVE Miscellaneous instructions
1845// -----------------------------------------------------------------------------
1846
1847// Read first fault register, unpredicated
1848// Set first fault register
1849// Write to first fault register
1850def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL,
1851                                       SETFFR,
1852                                       WRFFR)>;
1853
1854// Read first fault register, predicated
1855def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>;
1856
1857// Read first fault register and set flags
1858def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
1859
1860
1861}
1862