1//=- AArch64SchedNeoverseN1.td - NeoverseN1 Scheduling Model -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse N1 processors.
10//
11// References:
12// - "Arm Neoverse N1 Software Optimization Guide"
13// - https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_n1
14//
15//===----------------------------------------------------------------------===//
16
17def NeoverseN1Model : SchedMachineModel {
18  let IssueWidth            =   8; // Maximum micro-ops dispatch rate.
19  let MicroOpBufferSize     = 128; // NOTE: Copied from Cortex-A76.
20  let LoadLatency           =   4; // Optimistic load latency.
21  let MispredictPenalty     =  11; // Cycles cost of branch mispredicted.
22  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
23  let CompleteModel         =   1;
24
25  list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F,
26                                                    SMEUnsupported.F,
27                                                    SVEUnsupported.F,
28                                                    [HasMTE]);
29}
30
31//===----------------------------------------------------------------------===//
32// Define each kind of processor resource and number available on Neoverse N1.
33// Instructions are first fetched and then decoded into internal macro-ops
34// (MOPs).  From there, the MOPs proceed through register renaming and dispatch
35// stages.  A MOP can be split into one or more micro-ops further down the
36// pipeline, after the decode stage.  Once dispatched, micro-ops wait for their
37// operands and issue out-of-order to one of the issue pipelines.  Each issue
38// pipeline can accept one micro-op per cycle.
39
40let SchedModel = NeoverseN1Model in {
41
42// Define the issue ports.
43def N1UnitB  : ProcResource<1>;  // Branch
44def N1UnitS  : ProcResource<2>;  // Integer single cycle 0/1
45def N1UnitM  : ProcResource<1>;  // Integer multicycle
46def N1UnitL  : ProcResource<2>;  // Load/Store 0/1
47def N1UnitD  : ProcResource<2>;  // Store data 0/1
48def N1UnitV0 : ProcResource<1>;  // FP/ASIMD 0
49def N1UnitV1 : ProcResource<1>;  // FP/ASIMD 1
50
51def N1UnitI : ProcResGroup<[N1UnitS, N1UnitM]>;    // Integer units
52def N1UnitV : ProcResGroup<[N1UnitV0, N1UnitV1]>;  // FP/ASIMD units
53
54// Define commonly used read types.
55
56// No generic forwarding is provided for these types.
57def : ReadAdvance<ReadI,       0>;
58def : ReadAdvance<ReadISReg,   0>;
59def : ReadAdvance<ReadIEReg,   0>;
60def : ReadAdvance<ReadIM,      0>;
61def : ReadAdvance<ReadIMA,     0>;
62def : ReadAdvance<ReadID,      0>;
63def : ReadAdvance<ReadExtrHi,  0>;
64def : ReadAdvance<ReadAdrBase, 0>;
65def : ReadAdvance<ReadST,      0>;
66def : ReadAdvance<ReadVLD,     0>;
67
68def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
69def : WriteRes<WriteBarrier, []> { let Latency = 1; }
70def : WriteRes<WriteHint,    []> { let Latency = 1; }
71
72
73//===----------------------------------------------------------------------===//
74// Define generic 0 micro-op types
75
76let Latency = 0, NumMicroOps = 0 in
77def N1Write_0c_0Z : SchedWriteRes<[]>;
78
79//===----------------------------------------------------------------------===//
80// Define generic 1 micro-op types
81
82def N1Write_1c_1B     : SchedWriteRes<[N1UnitB]>  { let Latency = 1; }
83def N1Write_1c_1I     : SchedWriteRes<[N1UnitI]>  { let Latency = 1; }
84def N1Write_2c_1M     : SchedWriteRes<[N1UnitM]>  { let Latency = 2; }
85def N1Write_3c_1M     : SchedWriteRes<[N1UnitM]>  { let Latency = 3; }
86def N1Write_4c3_1M    : SchedWriteRes<[N1UnitM]>  { let Latency = 4;
87                                                    let ResourceCycles = [3]; }
88def N1Write_5c3_1M    : SchedWriteRes<[N1UnitM]>  { let Latency = 5;
89                                                    let ResourceCycles = [3]; }
90def N1Write_12c5_1M   : SchedWriteRes<[N1UnitM]>  { let Latency = 12;
91                                                    let ResourceCycles = [5]; }
92def N1Write_20c5_1M   : SchedWriteRes<[N1UnitM]>  { let Latency = 20;
93                                                    let ResourceCycles = [5]; }
94def N1Write_4c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 4; }
95def N1Write_5c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 5; }
96def N1Write_7c_1L     : SchedWriteRes<[N1UnitL]>  { let Latency = 7; }
97def N1Write_2c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 2; }
98def N1Write_3c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 3; }
99def N1Write_4c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 4; }
100def N1Write_5c_1V     : SchedWriteRes<[N1UnitV]>  { let Latency = 5; }
101def N1Write_2c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 2; }
102def N1Write_3c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 3; }
103def N1Write_4c_1V0    : SchedWriteRes<[N1UnitV0]> { let Latency = 4; }
104def N1Write_7c7_1V0   : SchedWriteRes<[N1UnitV0]> { let Latency = 7;
105                                                    let ResourceCycles = [7]; }
106def N1Write_10c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 10;
107                                                    let ResourceCycles = [7]; }
108def N1Write_13c10_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 13;
109                                                    let ResourceCycles = [10]; }
110def N1Write_15c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 15;
111                                                    let ResourceCycles = [7]; }
112def N1Write_17c7_1V0  : SchedWriteRes<[N1UnitV0]> { let Latency = 17;
113                                                    let ResourceCycles = [7]; }
114def N1Write_2c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 2; }
115def N1Write_3c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 3; }
116def N1Write_4c_1V1    : SchedWriteRes<[N1UnitV1]> { let Latency = 4; }
117
118//===----------------------------------------------------------------------===//
119// Define generic 2 micro-op types
120
121let Latency = 1, NumMicroOps = 2 in
122def N1Write_1c_1B_1I   : SchedWriteRes<[N1UnitB, N1UnitI]>;
123let Latency = 3, NumMicroOps = 2 in
124def N1Write_3c_1I_1M   : SchedWriteRes<[N1UnitI, N1UnitM]>;
125let Latency = 2, NumMicroOps = 2 in
126def N1Write_2c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
127let Latency = 5, NumMicroOps = 2 in
128def N1Write_5c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
129let Latency = 6, NumMicroOps = 2 in
130def N1Write_6c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
131let Latency = 7, NumMicroOps = 2 in
132def N1Write_7c_1I_1L   : SchedWriteRes<[N1UnitI, N1UnitL]>;
133let Latency = 5, NumMicroOps = 2 in
134def N1Write_5c_1M_1V   : SchedWriteRes<[N1UnitM, N1UnitV]>;
135let Latency = 6, NumMicroOps = 2 in
136def N1Write_6c_1M_1V0  : SchedWriteRes<[N1UnitM, N1UnitV0]>;
137let Latency = 5, NumMicroOps = 2 in
138def N1Write_5c_2L      : SchedWriteRes<[N1UnitL, N1UnitL]>;
139let Latency = 1, NumMicroOps = 2 in
140def N1Write_1c_1L_1D   : SchedWriteRes<[N1UnitL, N1UnitD]>;
141let Latency = 2, NumMicroOps = 2 in
142def N1Write_2c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
143let Latency = 4, NumMicroOps = 2 in
144def N1Write_4c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
145let Latency = 7, NumMicroOps = 2 in
146def N1Write_7c_1L_1V   : SchedWriteRes<[N1UnitL, N1UnitV]>;
147let Latency = 4, NumMicroOps = 2 in
148def N1Write_4c_1V0_1V1 : SchedWriteRes<[N1UnitV0, N1UnitV1]>;
149let Latency = 4, NumMicroOps = 2 in
150def N1Write_4c_2V0     : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
151let Latency = 5, NumMicroOps = 2 in
152def N1Write_5c_2V0     : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
153let Latency = 6, NumMicroOps = 2 in
154def N1Write_6c_2V1     : SchedWriteRes<[N1UnitV1, N1UnitV1]>;
155let Latency = 5, NumMicroOps = 2 in
156def N1Write_5c_1V1_1V  : SchedWriteRes<[N1UnitV1, N1UnitV]>;
157
158//===----------------------------------------------------------------------===//
159// Define generic 3 micro-op types
160
161let Latency = 7, NumMicroOps = 3 in
162def N1Write_2c_1I_1L_1V : SchedWriteRes<[N1UnitI, N1UnitL, N1UnitV]>;
163let Latency = 1, NumMicroOps = 3 in
164def N1Write_1c_2L_1D    : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitD]>;
165let Latency = 2, NumMicroOps = 3 in
166def N1Write_2c_1L_2V    : SchedWriteRes<[N1UnitL, N1UnitV, N1UnitV]>;
167let Latency = 6, NumMicroOps = 3 in
168def N1Write_6c_3L       : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL]>;
169let Latency = 4, NumMicroOps = 3 in
170def N1Write_4c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
171let Latency = 6, NumMicroOps = 3 in
172def N1Write_6c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
173let Latency = 8, NumMicroOps = 3 in
174def N1Write_8c_3V       : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
175
176//===----------------------------------------------------------------------===//
177// Define generic 4 micro-op types
178
179let Latency = 2, NumMicroOps = 4 in
180def N1Write_2c_2I_2L : SchedWriteRes<[N1UnitI, N1UnitI, N1UnitL, N1UnitL]>;
181let Latency = 6, NumMicroOps = 4 in
182def N1Write_6c_4L    : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL]>;
183let Latency = 2, NumMicroOps = 4 in
184def N1Write_2c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
185let Latency = 2, NumMicroOps = 4 in
186def N1Write_3c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
187let Latency = 5, NumMicroOps = 4 in
188def N1Write_5c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
189let Latency = 7, NumMicroOps = 4 in
190def N1Write_7c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
191let Latency = 4, NumMicroOps = 4 in
192def N1Write_4c_4V    : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
193let Latency = 6, NumMicroOps = 4 in
194def N1Write_6c_4V0   : SchedWriteRes<[N1UnitV0, N1UnitV0, N1UnitV0, N1UnitV0]>;
195
196//===----------------------------------------------------------------------===//
197// Define generic 5 micro-op types
198
199let Latency = 3, NumMicroOps = 5 in
200def N1Write_3c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
201                                      N1UnitV, N1UnitV, N1UnitV]>;
202let Latency = 7, NumMicroOps = 5 in
203def N1Write_7c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
204                                      N1UnitV, N1UnitV, N1UnitV]>;
205let Latency = 6, NumMicroOps = 5 in
206def N1Write_6c_5V    : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
207
208//===----------------------------------------------------------------------===//
209// Define generic 6 micro-op types
210
211let Latency = 3, NumMicroOps = 6 in
212def N1Write_3c_4L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
213                                      N1UnitV, N1UnitV]>;
214let Latency = 4, NumMicroOps = 6 in
215def N1Write_4c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
216                                      N1UnitV, N1UnitV, N1UnitV]>;
217let Latency = 5, NumMicroOps = 6 in
218def N1Write_5c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
219                                      N1UnitV, N1UnitV, N1UnitV]>;
220let Latency = 6, NumMicroOps = 6 in
221def N1Write_6c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
222                                      N1UnitV, N1UnitV, N1UnitV]>;
223let Latency = 7, NumMicroOps = 6 in
224def N1Write_7c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
225                                      N1UnitV, N1UnitV, N1UnitV]>;
226let Latency = 8, NumMicroOps = 6 in
227def N1Write_8c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
228                                      N1UnitV, N1UnitV, N1UnitV]>;
229
230//===----------------------------------------------------------------------===//
231// Define generic 7 micro-op types
232
233let Latency = 8, NumMicroOps = 7 in
234def N1Write_8c_3L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
235                                      N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
236
237//===----------------------------------------------------------------------===//
238// Define generic 8 micro-op types
239
240let Latency = 5, NumMicroOps = 8 in
241def N1Write_5c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
242                                       N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
243let Latency = 6, NumMicroOps = 8 in
244def N1Write_6c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
245                                       N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
246let Latency = 8, NumMicroOps = 8 in
247def N1Write_8c_4L_4V  : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
248                                       N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
249let Latency = 10, NumMicroOps = 8 in
250def N1Write_10c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
251                                       N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
252
253//===----------------------------------------------------------------------===//
254// Define generic 12 micro-op types
255
256let Latency = 9, NumMicroOps = 12 in
257def N1Write_9c_6L_6V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
258                                      N1UnitL, N1UnitL, N1UnitL,
259                                      N1UnitV, N1UnitV, N1UnitV,
260                                      N1UnitV, N1UnitV, N1UnitV]>;
261
262
263// Miscellaneous Instructions
264// -----------------------------------------------------------------------------
265
266def : InstRW<[WriteI], (instrs COPY)>;
267
268// Convert floating-point condition flags
269// Flag manipulation instructions
270def : WriteRes<WriteSys, []> { let Latency = 1; }
271
272
273// Branch Instructions
274// -----------------------------------------------------------------------------
275
276// Branch, immed
277// Compare and branch
278def : SchedAlias<WriteBr, N1Write_1c_1B>;
279
280// Branch, register
281def : SchedAlias<WriteBrReg, N1Write_1c_1B>;
282
283// Branch and link, immed
284// Branch and link, register
285def : InstRW<[N1Write_1c_1B_1I], (instrs BL, BLR)>;
286
287// Compare and branch
288def : InstRW<[N1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
289
290
291// Arithmetic and Logical Instructions
292// -----------------------------------------------------------------------------
293
294// ALU, basic
295// ALU, basic, flagset
296// Conditional compare
297// Conditional select
298// Logical, basic
299// Address generation
300// Count leading
301// Reverse bits/bytes
302// Move immediate
303def : SchedAlias<WriteI, N1Write_1c_1I>;
304
305// ALU, extend and shift
306def : SchedAlias<WriteIEReg, N1Write_2c_1M>;
307
308// Arithmetic, LSL shift, shift <= 4
309// Arithmetic, flagset, LSL shift, shift <= 4
310// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
311def N1WriteISReg : SchedWriteVariant<[
312                     SchedVar<IsCheapLSL,  [N1Write_1c_1I]>,
313                     SchedVar<NoSchedPred, [N1Write_2c_1M]>]>;
314def              : SchedAlias<WriteISReg, N1WriteISReg>;
315
316// Logical, shift, no flagset
317def : InstRW<[N1Write_1c_1I],
318             (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
319
320// Logical, shift, flagset
321def : InstRW<[N1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
322
323
324// Divide and multiply instructions
325// -----------------------------------------------------------------------------
326
327// Divide
328def : SchedAlias<WriteID32, N1Write_12c5_1M>;
329def : SchedAlias<WriteID64, N1Write_20c5_1M>;
330
331// Multiply accumulate
332// Multiply accumulate, long
333def : SchedAlias<WriteIM32, N1Write_2c_1M>;
334def : SchedAlias<WriteIM64, N1Write_4c3_1M>;
335
336// Multiply high
337def : InstRW<[N1Write_5c3_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
338
339
340// Miscellaneous data-processing instructions
341// -----------------------------------------------------------------------------
342
343// Bitfield extract, one reg
344// Bitfield extract, two regs
345def N1WriteExtr : SchedWriteVariant<[
346                    SchedVar<IsRORImmIdiomPred, [N1Write_1c_1I]>,
347                    SchedVar<NoSchedPred,       [N1Write_3c_1I_1M]>]>;
348def : SchedAlias<WriteExtr, N1WriteExtr>;
349
350// Bitfield move, basic
351// Variable shift
352def : SchedAlias<WriteIS, N1Write_1c_1I>;
353
354// Bitfield move, insert
355def : InstRW<[N1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
356
357// Move immediate
358def : SchedAlias<WriteImm, N1Write_1c_1I>;
359
360// Load instructions
361// -----------------------------------------------------------------------------
362
363// Load register, immed offset
364def : SchedAlias<WriteLD, N1Write_4c_1L>;
365
366// Load register, immed offset, index
367def : SchedAlias<WriteLDIdx, N1Write_4c_1L>;
368def : SchedAlias<WriteAdr,   N1Write_1c_1I>;
369
370// Load pair, immed offset
371def : SchedAlias<WriteLDHi, N1Write_4c_1L>;
372
373// Load pair, immed offset, W-form
374def : InstRW<[N1Write_4c_1L, N1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
375
376// Load pair, signed immed offset, signed words
377def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z], (instrs LDPSWi)>;
378
379// Load pair, immed post or pre-index, signed words
380def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z, WriteAdr],
381             (instrs LDPSWpost, LDPSWpre)>;
382
383
384// Store instructions
385// -----------------------------------------------------------------------------
386
387// Store register, immed offset
388def : SchedAlias<WriteST, N1Write_1c_1L_1D>;
389
390// Store register, immed offset, index
391def : SchedAlias<WriteSTIdx, N1Write_1c_1L_1D>;
392
393// Store pair, immed offset
394def : SchedAlias<WriteSTP, N1Write_1c_2L_1D>;
395
396// Store pair, immed offset, W-form
397def : InstRW<[N1Write_1c_1L_1D], (instrs STPWi)>;
398
399
400// FP data processing instructions
401// -----------------------------------------------------------------------------
402
403// FP absolute value
404// FP arithmetic
405// FP min/max
406// FP negate
407// FP select
408def : SchedAlias<WriteF, N1Write_2c_1V>;
409
410// FP compare
411def : SchedAlias<WriteFCmp, N1Write_2c_1V0>;
412
413// FP divide
414// FP square root
415def : SchedAlias<WriteFDiv, N1Write_10c7_1V0>;
416
417// FP divide, H-form
418// FP square root, H-form
419def : InstRW<[N1Write_7c7_1V0], (instrs FDIVHrr, FSQRTHr)>;
420
421// FP divide, S-form
422// FP square root, S-form
423def : InstRW<[N1Write_10c7_1V0], (instrs FDIVSrr, FSQRTSr)>;
424
425// FP divide, D-form
426def : InstRW<[N1Write_15c7_1V0], (instrs FDIVDrr)>;
427
428// FP square root, D-form
429def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTDr)>;
430
431// FP multiply
432def : SchedAlias<WriteFMul, N1Write_3c_1V>;
433
434// FP multiply accumulate
435def : InstRW<[N1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
436
437// FP round to integral
438def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
439                                          "^FRINT(32|64)[XZ][SD]r$")>;
440
441
442// FP miscellaneous instructions
443// -----------------------------------------------------------------------------
444
445// FP convert, from vec to vec reg
446// FP convert, Javascript from vec to gen reg
447def : SchedAlias<WriteFCvt, N1Write_3c_1V>;
448
449// FP convert, from gen to vec reg
450def : InstRW<[N1Write_6c_1M_1V0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
451
452// FP convert, from vec to gen reg
453def : InstRW<[N1Write_4c_1V0_1V1], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
454
455// FP move, immed
456def : SchedAlias<WriteFImm, N1Write_2c_1V>;
457
458// FP move, register
459def : InstRW<[N1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
460
461// FP transfer, from gen to low half of vec reg
462// FP transfer, from gen to high half of vec reg
463def : InstRW<[N1Write_3c_1M], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
464                                      FMOVXDHighr)>;
465
466// FP transfer, from vec to gen reg
467def : SchedAlias<WriteFCopy, N1Write_2c_1V1>;
468
469
470// FP load instructions
471// -----------------------------------------------------------------------------
472
473// Load vector reg, literal, S/D/Q forms
474// Load vector reg, unscaled immed
475def : InstRW<[N1Write_5c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
476                                                      "^LDUR[BHSDQ]i$")>;
477
478// Load vector reg, immed post-index
479// Load vector reg, immed pre-index
480def : InstRW<[N1Write_5c_1L, WriteAdr],
481             (instregex "^LDR[BHSDQ](post|pre)$")>;
482
483// Load vector reg, unsigned immed
484def : InstRW<[N1Write_5c_1I_1L], (instregex "^LDR[BHSDQ]ui$")>;
485
486// Load vector reg, register offset, basic
487// Load vector reg, register offset, scale, S/D-form
488// Load vector reg, register offset, extend
489// Load vector reg, register offset, extend, scale, S/D-form
490def : InstRW<[N1Write_5c_1I_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
491
492// Load vector reg, register offset, scale, H/Q-form
493// Load vector reg, register offset, extend, scale, H/Q-form
494def : InstRW<[N1Write_6c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
495
496// Load vector pair, immed offset, S/D-form
497def : InstRW<[N1Write_5c_1I_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
498
499// Load vector pair, immed offset, H/Q-form
500def : InstRW<[N1Write_7c_1I_1L, WriteLDHi], (instregex "^LDPN?[HQ]i$")>;
501
502// Load vector pair, immed post-index, S/D-form
503// Load vector pair, immed pre-index, S/D-form
504def : InstRW<[N1Write_5c_1L, WriteLDHi, WriteAdr],
505             (instregex "^LDP[SD](pre|post)$")>;
506
507// Load vector pair, immed post-index, Q-form
508// Load vector pair, immed pre-index, Q-form
509def : InstRW<[N1Write_7c_1L, WriteLDHi, WriteAdr],
510             (instrs LDPQpost, LDPQpre)>;
511
512
513// FP store instructions
514// -----------------------------------------------------------------------------
515
516// Store vector reg, unscaled immed, B/H/S/D-form
517def : InstRW<[N1Write_2c_1I_1L], (instregex "^STUR[BHSD]i$")>;
518
519// Store vector reg, unscaled immed, Q-form
520def : InstRW<[N1Write_2c_2I_2L], (instrs STURQi)>;
521
522// Store vector reg, immed post-index, B/H/S/D-form
523// Store vector reg, immed pre-index, B/H/S/D-form
524def : InstRW<[N1Write_2c_1L_1V, WriteAdr], (instregex "^STR[BHSD](pre|post)$")>;
525
526// Store vector reg, immed pre-index, Q-form
527// Store vector reg, immed post-index, Q-form
528def : InstRW<[N1Write_2c_2L_2V, WriteAdr], (instrs STRQpre, STRQpost)>;
529
530// Store vector reg, unsigned immed, B/H/S/D-form
531def : InstRW<[N1Write_2c_1L_1V], (instregex "^STR[BHSD]ui$")>;
532
533// Store vector reg, unsigned immed, Q-form
534def : InstRW<[N1Write_2c_2L_2V], (instrs STRQui)>;
535
536// Store vector reg, register offset, basic, B/S/D-form
537// Store vector reg, register offset, scale, B/S/D-form
538// Store vector reg, register offset, extend, B/S/D-form
539// Store vector reg, register offset, extend, scale, B/S/D-form
540def : InstRW<[N1Write_2c_1L_1V, ReadAdrBase], (instregex "^STR[BSD]ro[WX]$")>;
541
542// Store vector reg, register offset, basic, H-form
543// Store vector reg, register offset, scale, H-form
544// Store vector reg, register offset, extend, H-form
545// Store vector reg, register offset, extend, scale, H-form
546def : InstRW<[N1Write_2c_1I_1L_1V, ReadAdrBase], (instregex "^STRHro[WX]$")>;
547
548// Store vector reg, register offset, basic, Q-form
549// Store vector reg, register offset, scale, Q-form
550// Store vector reg, register offset, extend, Q-form
551// Store vector reg, register offset, extend, scale, Q-form
552def : InstRW<[N1Write_2c_2L_2V, ReadAdrBase], (instregex "^STRQro[WX]$")>;
553
554// Store vector pair, immed offset, S-form
555def : InstRW<[N1Write_2c_1L_1V], (instrs STPSi, STNPSi)>;
556
557// Store vector pair, immed offset, D-form
558def : InstRW<[N1Write_2c_2L_2V], (instrs STPDi, STNPDi)>;
559
560// Store vector pair, immed offset, Q-form
561def : InstRW<[N1Write_3c_4L_2V], (instrs STPQi, STNPQi)>;
562
563// Store vector pair, immed post-index, S-form
564// Store vector pair, immed pre-index, S-form
565def : InstRW<[N1Write_2c_1L_1V, WriteAdr], (instrs STPSpre, STPSpost)>;
566
567// Store vector pair, immed post-index, D-form
568// Store vector pair, immed pre-index, D-form
569def : InstRW<[N1Write_2c_2L_2V, WriteAdr], (instrs STPDpre, STPDpost)>;
570
571// Store vector pair, immed post-index, Q-form
572// Store vector pair, immed pre-index, Q-form
573def : InstRW<[N1Write_3c_4L_2V, WriteAdr], (instrs STPQpre, STPQpost)>;
574
575
576// ASIMD integer instructions
577// -----------------------------------------------------------------------------
578
579// ASIMD absolute diff
580// ASIMD absolute diff long
581// ASIMD arith, basic
582// ASIMD arith, complex
583// ASIMD arith, pair-wise
584// ASIMD compare
585// ASIMD logical
586// ASIMD max/min, basic and pair-wise
587def : SchedAlias<WriteVd, N1Write_2c_1V>;
588def : SchedAlias<WriteVq, N1Write_2c_1V>;
589
590// ASIMD absolute diff accum
591// ASIMD absolute diff accum long
592def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ABAL?v")>;
593
594// ASIMD arith, reduce, 4H/4S
595def : InstRW<[N1Write_3c_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
596
597// ASIMD arith, reduce, 8B/8H
598def : InstRW<[N1Write_5c_1V1_1V], (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
599
600// ASIMD arith, reduce, 16B
601def : InstRW<[N1Write_6c_2V1], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
602
603// ASIMD max/min, reduce, 4H/4S
604def : InstRW<[N1Write_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
605
606// ASIMD max/min, reduce, 8B/8H
607def : InstRW<[N1Write_5c_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
608
609// ASIMD max/min, reduce, 16B
610def : InstRW<[N1Write_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
611
612// ASIMD multiply, D-form
613// ASIMD multiply accumulate, D-form
614// ASIMD multiply accumulate high, D-form
615// ASIMD multiply accumulate saturating long
616// ASIMD multiply long
617// ASIMD multiply accumulate long
618def : InstRW<[N1Write_4c_1V0], (instregex "^MUL(v[14]i16|v[12]i32)$",
619                                          "^ML[AS](v[14]i16|v[12]i32)$",
620                                          "^SQ(R)?DMULH(v[14]i16|v[12]i32)$",
621                                          "^SQRDML[AS]H(v[14]i16|v[12]i32)$",
622                                          "^SQDML[AS]Lv",
623                                          "^([SU]|SQD)MULLv",
624                                          "^[SU]ML[AS]Lv")>;
625
626// ASIMD multiply, Q-form
627// ASIMD multiply accumulate, Q-form
628// ASIMD multiply accumulate high, Q-form
629def : InstRW<[N1Write_5c_2V0], (instregex "^MUL(v8i16|v4i32)$",
630                                          "^ML[AS](v8i16|v4i32)$",
631                                          "^SQ(R)?DMULH(v8i16|v4i32)$",
632                                          "^SQRDML[AS]H(v8i16|v4i32)$")>;
633
634// ASIMD multiply/multiply long (8x8) polynomial, D-form
635def : InstRW<[N1Write_3c_1V0], (instrs PMULv8i8, PMULLv8i8)>;
636
637// ASIMD multiply/multiply long (8x8) polynomial, Q-form
638def : InstRW<[N1Write_4c_2V0], (instrs PMULv16i8, PMULLv16i8)>;
639
640// ASIMD pairwise add and accumulate long
641def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ADALPv")>;
642
643// ASIMD shift accumulate
644def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]R?SRAv")>;
645
646// ASIMD shift by immed, basic
647// ASIMD shift by immed and insert, basic
648// ASIMD shift by register, basic
649def : InstRW<[N1Write_2c_1V1], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
650                                          "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
651
652// ASIMD shift by immed, complex
653// ASIMD shift by register, complex
654def : InstRW<[N1Write_4c_1V1],
655             (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
656                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
657                        "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
658                        "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
659
660
661// ASIMD FP instructions
662// -----------------------------------------------------------------------------
663
664// ASIMD FP absolute value/difference
665// ASIMD FP arith, normal
666// ASIMD FP compare
667// ASIMD FP max/min, normal
668// ASIMD FP max/min, pairwise
669// ASIMD FP negate
670// Covered by "SchedAlias (WriteV[dq]...)" above
671
672// ASIMD FP convert, long (F16 to F32)
673def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTL(v4|v8)i16$")>;
674
675// ASIMD FP convert, long (F32 to F64)
676def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTL(v2|v4)i32$")>;
677
678// ASIMD FP convert, narrow (F32 to F16)
679def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTN(v4|v8)i16$")>;
680
681// ASIMD FP convert, narrow (F64 to F32)
682def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32$",
683                                          "^FCVTXN(v2|v4)f32$")>;
684
685// ASIMD FP convert, other, D-form F32 and Q-form F64
686def : InstRW<[N1Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
687                                          "^[SU]CVTFv2f(32|64)$")>;
688
689// ASIMD FP convert, other, D-form F16 and Q-form F32
690def : InstRW<[N1Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
691                                          "^[SU]CVTFv4f(16|32)$")>;
692
693// ASIMD FP convert, other, Q-form F16
694def : InstRW<[N1Write_6c_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
695                                          "^[SU]CVTFv8f16$")>;
696
697// ASIMD FP divide, D-form, F16
698// ASIMD FP square root, D-form, F16
699def : InstRW<[N1Write_7c7_1V0], (instrs FDIVv4f16, FSQRTv4f16)>;
700
701// ASIMD FP divide, D-form, F32
702// ASIMD FP square root, D-form, F32
703def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv2f32, FSQRTv2f32)>;
704
705// ASIMD FP divide, Q-form, F16
706// ASIMD FP square root, Q-form, F16
707def : InstRW<[N1Write_13c10_1V0], (instrs FDIVv8f16, FSQRTv8f16)>;
708
709// ASIMD FP divide, Q-form, F32
710// ASIMD FP square root, Q-form, F32
711def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv4f32, FSQRTv4f32)>;
712
713// ASIMD FP divide, Q-form, F64
714def : InstRW<[N1Write_15c7_1V0], (instrs FDIVv2f64)>;
715
716// ASIMD FP square root, Q-form, F64
717def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTv2f64)>;
718
719// ASIMD FP max/min, reduce, F32 and D-form F16
720def : InstRW<[N1Write_5c_1V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
721
722// ASIMD FP max/min, reduce, Q-form F16
723def : InstRW<[N1Write_8c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
724
725// ASIMD FP multiply
726def : InstRW<[N1Write_3c_1V], (instregex "^FMULX?v")>;
727
728// ASIMD FP multiply accumulate
729def : InstRW<[N1Write_4c_1V], (instregex "^FML[AS]v")>;
730
731// ASIMD FP multiply accumulate long
732def : InstRW<[N1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
733
734// ASIMD FP round, D-form F32 and Q-form F64
735def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
736
737// ASIMD FP round, D-form F16 and Q-form F32
738def : InstRW<[N1Write_4c_2V0], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
739
740// ASIMD FP round, Q-form F16
741def : InstRW<[N1Write_6c_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
742
743
744// ASIMD miscellaneous instructions
745// -----------------------------------------------------------------------------
746
747// ASIMD bit reverse
748// ASIMD bitwise insert
749// ASIMD count
750// ASIMD duplicate, element
751// ASIMD extract
752// ASIMD extract narrow
753// ASIMD insert, element to element
754// ASIMD move, FP immed
755// ASIMD move, integer immed
756// ASIMD reverse
757// ASIMD table lookup, 1 or 2 table regs
758// ASIMD table lookup extension, 1 table reg
759// ASIMD transfer, element to gen reg
760// ASIMD transpose
761// ASIMD unzip/zip
762// Covered by "SchedAlias (WriteV[dq]...)" above
763
764// ASIMD duplicate, gen reg
765def : InstRW<[N1Write_3c_1M],
766             (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
767
768// ASIMD extract narrow, saturating
769def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
770
771// ASIMD reciprocal and square root estimate, D-form F32 and F64
772def : InstRW<[N1Write_3c_1V0], (instrs FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
773                                       FRECPXv1i32, FRECPXv1i64,
774                                       URECPEv2i32,
775                                       FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64,
776                                       URSQRTEv2i32)>;
777
778// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
779def : InstRW<[N1Write_4c_2V0], (instrs FRECPEv1f16, FRECPEv4f16, FRECPEv4f32,
780                                       FRECPXv1f16,
781                                       URECPEv4i32,
782                                       FRSQRTEv1f16, FRSQRTEv4f16, FRSQRTEv4f32,
783                                       URSQRTEv4i32)>;
784
785// ASIMD reciprocal and square root estimate, Q-form F16
786def : InstRW<[N1Write_6c_4V0], (instrs FRECPEv8f16,
787                                       FRSQRTEv8f16)>;
788
789// ASIMD reciprocal step
790def : InstRW<[N1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
791                                         "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
792
793// ASIMD table lookup, 3 table regs
794// ASIMD table lookup extension, 2 table reg
795def : InstRW<[N1Write_4c_4V], (instrs TBLv8i8Three, TBLv16i8Three,
796                                      TBXv8i8Two, TBXv16i8Two)>;
797
798// ASIMD table lookup, 4 table regs
799def : InstRW<[N1Write_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>;
800
801// ASIMD table lookup extension, 3 table reg
802def : InstRW<[N1Write_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
803
804// ASIMD table lookup extension, 4 table reg
805def : InstRW<[N1Write_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
806
807// ASIMD transfer, element to gen reg
808def : InstRW<[N1Write_2c_1V1], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
809                                          "^UMOVvi(8|16|32|64)$")>;
810
811// ASIMD transfer, gen reg to element
812def : InstRW<[N1Write_5c_1M_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
813
814
815// ASIMD load instructions
816// -----------------------------------------------------------------------------
817
818// ASIMD load, 1 element, multiple, 1 reg
819def : InstRW<[N1Write_5c_1L],
820             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
821def : InstRW<[N1Write_5c_1L, WriteAdr],
822             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
823
824// ASIMD load, 1 element, multiple, 2 reg
825def : InstRW<[N1Write_5c_2L],
826             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
827def : InstRW<[N1Write_5c_2L, WriteAdr],
828             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
829
830// ASIMD load, 1 element, multiple, 3 reg
831def : InstRW<[N1Write_6c_3L],
832             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
833def : InstRW<[N1Write_6c_3L, WriteAdr],
834             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
835
836// ASIMD load, 1 element, multiple, 4 reg
837def : InstRW<[N1Write_6c_4L],
838             (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
839def : InstRW<[N1Write_6c_4L, WriteAdr],
840             (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
841
842// ASIMD load, 1 element, one lane
843// ASIMD load, 1 element, all lanes
844def : InstRW<[N1Write_7c_1L_1V],
845             (instregex "LD1(i|Rv)(8|16|32|64)$",
846                        "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
847def : InstRW<[N1Write_7c_1L_1V, WriteAdr],
848             (instregex "LD1i(8|16|32|64)_POST$",
849                        "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
850
851// ASIMD load, 2 element, multiple
852// ASIMD load, 2 element, one lane
853// ASIMD load, 2 element, all lanes
854def : InstRW<[N1Write_7c_2L_2V],
855             (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$",
856                        "LD2i(8|16|32|64)$",
857                        "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
858def : InstRW<[N1Write_7c_2L_2V, WriteAdr],
859             (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$",
860                        "LD2i(8|16|32|64)_POST$",
861                        "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
862
863// ASIMD load, 3 element, multiple
864def : InstRW<[N1Write_8c_3L_3V],
865             (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>;
866def : InstRW<[N1Write_8c_3L_3V, WriteAdr],
867             (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
868
869// ASIMD load, 3 element, one lane
870// ASIMD load, 3 element, all lanes
871def : InstRW<[N1Write_7c_2L_3V],
872             (instregex "LD3i(8|16|32|64)$",
873                        "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
874def : InstRW<[N1Write_7c_2L_3V, WriteAdr],
875             (instregex "LD3i(8|16|32|64)_POST$",
876                        "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
877
878// ASIMD load, 4 element, multiple, D-form
879def : InstRW<[N1Write_8c_3L_4V],
880             (instregex "LD4Fourv(8b|4h|2s)$")>;
881def : InstRW<[N1Write_8c_3L_4V, WriteAdr],
882             (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
883
884// ASIMD load, 4 element, multiple, Q-form
885def : InstRW<[N1Write_10c_4L_4V],
886             (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
887def : InstRW<[N1Write_10c_4L_4V, WriteAdr],
888             (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
889
890// ASIMD load, 4 element, one lane
891// ASIMD load, 4 element, all lanes
892def : InstRW<[N1Write_8c_4L_4V],
893             (instregex "LD4i(8|16|32|64)$",
894                        "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
895def : InstRW<[N1Write_8c_4L_4V, WriteAdr],
896             (instregex "LD4i(8|16|32|64)_POST$",
897                        "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
898
899
900// ASIMD store instructions
901// -----------------------------------------------------------------------------
902
903// ASIMD store, 1 element, multiple, 1 reg, D-form
904def : InstRW<[N1Write_2c_1L_1V],
905             (instregex "ST1Onev(8b|4h|2s|1d)$")>;
906def : InstRW<[N1Write_2c_1L_1V, WriteAdr],
907             (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
908
909// ASIMD store, 1 element, multiple, 1 reg, Q-form
910def : InstRW<[N1Write_2c_1L_1V],
911             (instregex "ST1Onev(16b|8h|4s|2d)$")>;
912def : InstRW<[N1Write_2c_1L_1V, WriteAdr],
913             (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
914
915// ASIMD store, 1 element, multiple, 2 reg, D-form
916def : InstRW<[N1Write_2c_1L_2V],
917             (instregex "ST1Twov(8b|4h|2s|1d)$")>;
918def : InstRW<[N1Write_2c_1L_2V, WriteAdr],
919             (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
920
921// ASIMD store, 1 element, multiple, 2 reg, Q-form
922def : InstRW<[N1Write_3c_2L_2V],
923             (instregex "ST1Twov(16b|8h|4s|2d)$")>;
924def : InstRW<[N1Write_3c_2L_2V, WriteAdr],
925             (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
926
927// ASIMD store, 1 element, multiple, 3 reg, D-form
928def : InstRW<[N1Write_3c_2L_3V],
929             (instregex "ST1Threev(8b|4h|2s|1d)$")>;
930def : InstRW<[N1Write_3c_2L_3V, WriteAdr],
931             (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
932
933// ASIMD store, 1 element, multiple, 3 reg, Q-form
934def : InstRW<[N1Write_4c_3L_3V],
935             (instregex "ST1Threev(16b|8h|4s|2d)$")>;
936def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
937             (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
938
939// ASIMD store, 1 element, multiple, 4 reg, D-form
940def : InstRW<[N1Write_3c_2L_2V],
941             (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
942def : InstRW<[N1Write_3c_2L_2V, WriteAdr],
943             (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
944
945// ASIMD store, 1 element, multiple, 4 reg, Q-form
946def : InstRW<[N1Write_5c_4L_4V],
947             (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
948def : InstRW<[N1Write_5c_4L_4V, WriteAdr],
949             (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
950
951// ASIMD store, 1 element, one lane
952def : InstRW<[N1Write_4c_1L_1V],
953             (instregex "ST1i(8|16|32|64)$")>;
954def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
955             (instregex "ST1i(8|16|32|64)_POST$")>;
956
957// ASIMD store, 2 element, multiple, D-form, B/H/S
958def : InstRW<[N1Write_4c_1L_1V],
959             (instregex "ST2Twov(8b|4h|2s)$")>;
960def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
961             (instregex "ST2Twov(8b|4h|2s)_POST$")>;
962
963// ASIMD store, 2 element, multiple, Q-form
964def : InstRW<[N1Write_5c_2L_2V],
965             (instregex "ST2Twov(16b|8h|4s|2d)$")>;
966def : InstRW<[N1Write_5c_2L_2V, WriteAdr],
967             (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
968
969// ASIMD store, 2 element, one lane
970def : InstRW<[N1Write_4c_1L_1V],
971             (instregex "ST2i(8|16|32|64)$")>;
972def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
973             (instregex "ST2i(8|16|32|64)_POST$")>;
974
975// ASIMD store, 3 element, multiple, D-form, B/H/S
976def : InstRW<[N1Write_5c_2L_2V],
977             (instregex "ST3Threev(8b|4h|2s)$")>;
978def : InstRW<[N1Write_5c_2L_2V, WriteAdr],
979             (instregex "ST3Threev(8b|4h|2s)_POST$")>;
980
981// ASIMD store, 3 element, multiple, Q-form
982def : InstRW<[N1Write_6c_3L_3V],
983             (instregex "ST3Threev(16b|8h|4s|2d)$")>;
984def : InstRW<[N1Write_6c_3L_3V, WriteAdr],
985             (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
986
987// ASIMD store, 3 element, one lane, B/H/S
988def : InstRW<[N1Write_4c_3L_3V],
989             (instregex "ST3i(8|16|32)$")>;
990def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
991             (instregex "ST3i(8|16|32)_POST$")>;
992
993// ASIMD store, 3 element, one lane, D
994def : InstRW<[N1Write_5c_3L_3V],
995             (instrs ST3i64)>;
996def : InstRW<[N1Write_5c_3L_3V, WriteAdr],
997             (instrs ST3i64_POST)>;
998
999// ASIMD store, 4 element, multiple, D-form, B/H/S
1000def : InstRW<[N1Write_7c_3L_3V],
1001             (instregex "ST4Fourv(8b|4h|2s)$")>;
1002def : InstRW<[N1Write_7c_3L_3V, WriteAdr],
1003             (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1004
1005// ASIMD store, 4 element, multiple, Q-form, B/H/S
1006def : InstRW<[N1Write_9c_6L_6V],
1007             (instregex "ST4Fourv(16b|8h|4s)$")>;
1008def : InstRW<[N1Write_9c_6L_6V, WriteAdr],
1009             (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1010
1011// ASIMD store, 4 element, multiple, Q-form, D
1012def : InstRW<[N1Write_6c_4L_4V],
1013             (instrs ST4Fourv2d)>;
1014def : InstRW<[N1Write_6c_4L_4V, WriteAdr],
1015             (instrs ST4Fourv2d_POST)>;
1016
1017// ASIMD store, 4 element, one lane, B/H/S
1018def : InstRW<[N1Write_5c_3L_3V],
1019             (instregex "ST4i(8|16|32)$")>;
1020def : InstRW<[N1Write_5c_3L_3V, WriteAdr],
1021             (instregex "ST4i(8|16|32)_POST$")>;
1022
1023// ASIMD store, 4 element, one lane, D
1024def : InstRW<[N1Write_4c_3L_3V],
1025             (instrs ST4i64)>;
1026def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
1027             (instrs ST4i64_POST)>;
1028
1029
1030// Cryptography extensions
1031// -----------------------------------------------------------------------------
1032
1033// Crypto AES ops
1034def N1WriteVC : WriteSequence<[N1Write_2c_1V0]>;
1035def N1ReadVC  : SchedReadAdvance<2, [N1WriteVC]>;
1036def           : InstRW<[N1WriteVC], (instrs AESDrr, AESErr)>;
1037def           : InstRW<[N1Write_2c_1V0, N1ReadVC], (instrs AESMCrr, AESIMCrr)>;
1038
1039// Crypto polynomial (64x64) multiply long
1040// Crypto SHA1 hash acceleration op
1041// Crypto SHA1 schedule acceleration ops
1042// Crypto SHA256 schedule acceleration ops
1043def : InstRW<[N1Write_2c_1V0], (instregex "^PMULLv[12]i64$",
1044                                          "^SHA1(H|SU0|SU1)rr",
1045                                          "^SHA256SU[01]rr")>;
1046
1047// Crypto SHA1 hash acceleration ops
1048// Crypto SHA256 hash acceleration ops
1049def : InstRW<[N1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
1050                                          "^SHA256H2?rrr$")>;
1051
1052
1053// CRC
1054// -----------------------------------------------------------------------------
1055
1056// CRC checksum ops
1057def : InstRW<[N1Write_2c_1M], (instregex "^CRC32C?[BHWX]rr$")>;
1058
1059
1060}
1061