1e8d8bef9SDimitry Andric//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
2e8d8bef9SDimitry Andric//
3e8d8bef9SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric//
7e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric//
981ad6265SDimitry Andric// This file defines the machine model for the ARM Cortex-A55 processors. Note
1081ad6265SDimitry Andric// that this schedule is currently used as the default for -mcpu=generic. As a
1181ad6265SDimitry Andric// result, some of the modelling decision made do not precisely model the
1281ad6265SDimitry Andric// Cortex-A55, instead aiming to be a good compromise between different cpus.
13e8d8bef9SDimitry Andric//
14e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric
16e8d8bef9SDimitry Andric// ===---------------------------------------------------------------------===//
17e8d8bef9SDimitry Andric// The following definitions describe the per-operand machine model.
18e8d8bef9SDimitry Andric// This works with MachineScheduler. See MCSchedModel.h for details.
19e8d8bef9SDimitry Andric
20e8d8bef9SDimitry Andric// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
21e8d8bef9SDimitry Andricdef CortexA55Model : SchedMachineModel {
22e8d8bef9SDimitry Andric  let MicroOpBufferSize = 0;  // The Cortex-A55 is an in-order processor
23e8d8bef9SDimitry Andric  let IssueWidth = 2;         // It dual-issues under most circumstances
24e8d8bef9SDimitry Andric  let LoadLatency = 4;        // Cycles for loads to access the cache. The
25e8d8bef9SDimitry Andric                              // optimisation guide shows that most loads have
26e8d8bef9SDimitry Andric                              // a latency of 3, but some have a latency of 4
27e8d8bef9SDimitry Andric                              // or 5. Setting it 4 looked to be good trade-off.
28e8d8bef9SDimitry Andric  let MispredictPenalty = 8;  // A branch direction mispredict.
29e8d8bef9SDimitry Andric  let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
30e8d8bef9SDimitry Andric  let CompleteModel = 0;      // Covers instructions applicable to Cortex-A55.
31e8d8bef9SDimitry Andric
32753f127fSDimitry Andric  list<Predicate> UnsupportedFeatures = [HasSVE, HasMTE];
33e8d8bef9SDimitry Andric
34e8d8bef9SDimitry Andric  // FIXME: Remove when all errors have been fixed.
35e8d8bef9SDimitry Andric  let FullInstRWOverlapCheck = 0;
36e8d8bef9SDimitry Andric}
37e8d8bef9SDimitry Andric
38e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
39e8d8bef9SDimitry Andric// Define each kind of processor resource and number available.
40e8d8bef9SDimitry Andric
41e8d8bef9SDimitry Andric// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
42e8d8bef9SDimitry Andric// Cortex-A55 is in-order.
43e8d8bef9SDimitry Andric
44e8d8bef9SDimitry Andricdef CortexA55UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
45e8d8bef9SDimitry Andricdef CortexA55UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
46e8d8bef9SDimitry Andricdef CortexA55UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
47e8d8bef9SDimitry Andricdef CortexA55UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load pipe
48e8d8bef9SDimitry Andricdef CortexA55UnitSt     : ProcResource<1> { let BufferSize = 0; } // Store pipe
49e8d8bef9SDimitry Andricdef CortexA55UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
50e8d8bef9SDimitry Andric
51e8d8bef9SDimitry Andric// The FP DIV/SQRT instructions execute totally differently from the FP ALU
52e8d8bef9SDimitry Andric// instructions, which can mostly be dual-issued; that's why for now we model
53e8d8bef9SDimitry Andric// them with 2 resources.
54e8d8bef9SDimitry Andricdef CortexA55UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
55e8d8bef9SDimitry Andricdef CortexA55UnitFPMAC  : ProcResource<2> { let BufferSize = 0; } // FP MAC
56e8d8bef9SDimitry Andricdef CortexA55UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
57e8d8bef9SDimitry Andric
58e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
59e8d8bef9SDimitry Andric// Subtarget-specific SchedWrite types
60e8d8bef9SDimitry Andric
61e8d8bef9SDimitry Andriclet SchedModel = CortexA55Model in {
62e8d8bef9SDimitry Andric
63e8d8bef9SDimitry Andric// These latencies are modeled without taking into account forwarding paths
64e8d8bef9SDimitry Andric// (the software optimisation guide lists latencies taking into account
65e8d8bef9SDimitry Andric// typical forwarding paths).
66e8d8bef9SDimitry Andricdef : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; }    // MOVN, MOVZ
67e8d8bef9SDimitry Andricdef : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; }      // ALU
68e8d8bef9SDimitry Andricdef : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Shifted-Reg
69e8d8bef9SDimitry Andricdef : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Extended-Reg
70e8d8bef9SDimitry Andricdef : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; }   // EXTR from a reg pair
71e8d8bef9SDimitry Andricdef : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; }     // Shift/Scale
72e8d8bef9SDimitry Andric
73e8d8bef9SDimitry Andric// MAC
74e8d8bef9SDimitry Andricdef : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; }   // 32-bit Multiply
75e8d8bef9SDimitry Andricdef : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; }   // 64-bit Multiply
76e8d8bef9SDimitry Andric
77e8d8bef9SDimitry Andric// Div
78e8d8bef9SDimitry Andricdef : WriteRes<WriteID32, [CortexA55UnitDiv]> {
79*5f757f3fSDimitry Andric  let Latency = 8; let ReleaseAtCycles = [8];
80e8d8bef9SDimitry Andric}
81e8d8bef9SDimitry Andricdef : WriteRes<WriteID64, [CortexA55UnitDiv]> {
82*5f757f3fSDimitry Andric  let Latency = 8; let ReleaseAtCycles = [8];
83e8d8bef9SDimitry Andric}
84e8d8bef9SDimitry Andric
85e8d8bef9SDimitry Andric// Load
86e8d8bef9SDimitry Andricdef : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
87e8d8bef9SDimitry Andricdef : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
88e8d8bef9SDimitry Andricdef : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
89e8d8bef9SDimitry Andric
90e8d8bef9SDimitry Andric// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
91e8d8bef9SDimitry Andric//               below, choosing the median of 3 which makes the latency 6.
92e8d8bef9SDimitry Andric// An extra cycle is needed to get the swizzling right.
93e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
94*5f757f3fSDimitry Andric                                           let ReleaseAtCycles = [3]; }
95e8d8bef9SDimitry Andricdef CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
96fe6060f1SDimitry Andricdef CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; }
97e8d8bef9SDimitry Andricdef CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
98*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [2]; }
99e8d8bef9SDimitry Andricdef CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
100*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [3]; }
101e8d8bef9SDimitry Andricdef CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
102*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [4]; }
103e8d8bef9SDimitry Andricdef CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
104*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [5]; }
105e8d8bef9SDimitry Andricdef CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
106*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [6]; }
107e8d8bef9SDimitry Andricdef CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
108*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [7]; }
109e8d8bef9SDimitry Andricdef CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
110*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [8]; }
111e8d8bef9SDimitry Andric
112fe6060f1SDimitry Andricdef CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; }
113fe6060f1SDimitry Andricdef CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; }
114fe6060f1SDimitry Andricdef CortexA55WriteLDP4 : SchedWriteRes<[CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd]> { let Latency = 6; }
115fe6060f1SDimitry Andric
116e8d8bef9SDimitry Andric// Pre/Post Indexing - Performed as part of address generation
117e8d8bef9SDimitry Andricdef : WriteRes<WriteAdr, []> { let Latency = 0; }
118e8d8bef9SDimitry Andric
119e8d8bef9SDimitry Andric// Store
120fe6060f1SDimitry Andriclet RetireOOO = 1 in {
121fe6060f1SDimitry Andricdef : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 1; }
122fe6060f1SDimitry Andricdef : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 1; }
123fe6060f1SDimitry Andricdef : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 1; }
124fe6060f1SDimitry Andric}
125e8d8bef9SDimitry Andricdef : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
126e8d8bef9SDimitry Andric
127e8d8bef9SDimitry Andric// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
128e8d8bef9SDimitry Andricdef : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
129*5f757f3fSDimitry Andric                                          let ReleaseAtCycles = [2];}
130e8d8bef9SDimitry Andricdef CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
131e8d8bef9SDimitry Andricdef CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
132*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [2]; }
133e8d8bef9SDimitry Andricdef CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
134*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [3]; }
135e8d8bef9SDimitry Andricdef CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
136*5f757f3fSDimitry Andric                                                  let ReleaseAtCycles = [4]; }
137e8d8bef9SDimitry Andric
138e8d8bef9SDimitry Andricdef : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
139e8d8bef9SDimitry Andric
140e8d8bef9SDimitry Andric// Branch
141e8d8bef9SDimitry Andricdef : WriteRes<WriteBr, [CortexA55UnitB]>;
142e8d8bef9SDimitry Andricdef : WriteRes<WriteBrReg, [CortexA55UnitB]>;
143e8d8bef9SDimitry Andricdef : WriteRes<WriteSys, [CortexA55UnitB]>;
144e8d8bef9SDimitry Andricdef : WriteRes<WriteBarrier, [CortexA55UnitB]>;
145e8d8bef9SDimitry Andricdef : WriteRes<WriteHint, [CortexA55UnitB]>;
146e8d8bef9SDimitry Andric
147e8d8bef9SDimitry Andric// FP ALU
148e8d8bef9SDimitry Andric//   As WriteF result is produced in F5 and it can be mostly forwarded
149e8d8bef9SDimitry Andric//   to consumer at F1, the effectively latency is set as 4.
150e8d8bef9SDimitry Andricdef : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
151e8d8bef9SDimitry Andricdef : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
152e8d8bef9SDimitry Andricdef : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
153e8d8bef9SDimitry Andricdef : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
154e8d8bef9SDimitry Andricdef : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
15581ad6265SDimitry Andric
15681ad6265SDimitry Andric// NEON
15781ad6265SDimitry Andricclass CortexA55WriteVd<int n, ProcResourceKind res> : SchedWriteRes<[res]> {
15881ad6265SDimitry Andric  let Latency = n;
15981ad6265SDimitry Andric}
16081ad6265SDimitry Andricclass CortexA55WriteVq<int n, ProcResourceKind res> : SchedWriteRes<[res, res]> {
16181ad6265SDimitry Andric  let Latency = n;
16281ad6265SDimitry Andric  let BeginGroup = 1;
16381ad6265SDimitry Andric}
16481ad6265SDimitry Andricdef CortexA55WriteDotScVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
16581ad6265SDimitry Andricdef CortexA55WriteDotVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
16681ad6265SDimitry Andricdef CortexA55WriteDotVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>;
16781ad6265SDimitry Andricdef CortexA55WriteMlaLVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
16881ad6265SDimitry Andricdef CortexA55WriteMlaIxVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
16981ad6265SDimitry Andricdef CortexA55WriteMlaVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
17081ad6265SDimitry Andricdef CortexA55WriteMlaVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>;
17181ad6265SDimitry Andricdef CortexA55WriteAluVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
17281ad6265SDimitry Andricdef CortexA55WriteAluVd_3 : CortexA55WriteVd<3, CortexA55UnitFPALU>;
17381ad6265SDimitry Andricdef CortexA55WriteAluVq_3 : CortexA55WriteVq<3, CortexA55UnitFPALU>;
17481ad6265SDimitry Andricdef CortexA55WriteAluVd_2 : CortexA55WriteVd<2, CortexA55UnitFPALU>;
17581ad6265SDimitry Andricdef CortexA55WriteAluVq_2 : CortexA55WriteVq<2, CortexA55UnitFPALU>;
17681ad6265SDimitry Andricdef CortexA55WriteAluVd_1 : CortexA55WriteVd<1, CortexA55UnitFPALU>;
17781ad6265SDimitry Andricdef CortexA55WriteAluVq_1 : CortexA55WriteVq<1, CortexA55UnitFPALU>;
17881ad6265SDimitry Andricdef : SchedAlias<WriteVd, CortexA55WriteVd<4, CortexA55UnitFPALU>>;
17981ad6265SDimitry Andricdef : SchedAlias<WriteVq, CortexA55WriteVq<4, CortexA55UnitFPALU>>;
180e8d8bef9SDimitry Andric
181e8d8bef9SDimitry Andric// FP ALU specific new schedwrite definitions
182349cc55cSDimitry Andricdef CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
183e8d8bef9SDimitry Andricdef CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
184e8d8bef9SDimitry Andricdef CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
185e8d8bef9SDimitry Andricdef CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
186e8d8bef9SDimitry Andric
187e8d8bef9SDimitry Andric// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
188e8d8bef9SDimitry Andricdef : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
189fe6060f1SDimitry Andric
190fe6060f1SDimitry Andriclet RetireOOO = 1 in {
191e8d8bef9SDimitry Andricdef : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
192*5f757f3fSDimitry Andric                                            let ReleaseAtCycles = [29]; }
193e8d8bef9SDimitry Andricdef CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
194e8d8bef9SDimitry Andricdef CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
195*5f757f3fSDimitry Andric                                                     let ReleaseAtCycles = [5]; }
196e8d8bef9SDimitry Andricdef CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
197*5f757f3fSDimitry Andric                                                     let ReleaseAtCycles = [10]; }
198e8d8bef9SDimitry Andricdef CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
199*5f757f3fSDimitry Andric                                                     let ReleaseAtCycles = [19]; }
200e8d8bef9SDimitry Andricdef CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
201*5f757f3fSDimitry Andric                                                      let ReleaseAtCycles = [5]; }
202e8d8bef9SDimitry Andricdef CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
203*5f757f3fSDimitry Andric                                                      let ReleaseAtCycles = [9]; }
204e8d8bef9SDimitry Andricdef CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
205*5f757f3fSDimitry Andric                                                      let ReleaseAtCycles = [19]; }
206fe6060f1SDimitry Andric}
207e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
208e8d8bef9SDimitry Andric// Subtarget-specific SchedRead types.
209e8d8bef9SDimitry Andric
210e8d8bef9SDimitry Andricdef : ReadAdvance<ReadVLD, 0>;
211e8d8bef9SDimitry Andricdef : ReadAdvance<ReadExtrHi, 1>;
212e8d8bef9SDimitry Andricdef : ReadAdvance<ReadAdrBase, 1>;
213349cc55cSDimitry Andricdef : ReadAdvance<ReadST, 1>;
214e8d8bef9SDimitry Andric
215e8d8bef9SDimitry Andric// ALU - ALU input operands are generally needed in EX1. An operand produced in
216e8d8bef9SDimitry Andric//       in say EX2 can be forwarded for consumption to ALU in EX1, thereby
217e8d8bef9SDimitry Andric//       allowing back-to-back ALU operations such as add. If an operand requires
218e8d8bef9SDimitry Andric//       a shift, it will, however, be required in ISS stage.
219e8d8bef9SDimitry Andricdef : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
220e8d8bef9SDimitry Andric                             WriteISReg, WriteIEReg,WriteIS,
221e8d8bef9SDimitry Andric                             WriteID32,WriteID64,
222e8d8bef9SDimitry Andric                             WriteIM32,WriteIM64]>;
223e8d8bef9SDimitry Andric// Shifted operand
224e8d8bef9SDimitry Andricdef CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
225e8d8bef9SDimitry Andric                                          WriteISReg, WriteIEReg,WriteIS,
226e8d8bef9SDimitry Andric                                          WriteID32,WriteID64,
227e8d8bef9SDimitry Andric                                          WriteIM32,WriteIM64]>;
228e8d8bef9SDimitry Andricdef CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
229e8d8bef9SDimitry Andric                                             WriteISReg, WriteIEReg,WriteIS,
230e8d8bef9SDimitry Andric                                             WriteID32,WriteID64,
231e8d8bef9SDimitry Andric                                             WriteIM32,WriteIM64]>;
232e8d8bef9SDimitry Andricdef CortexA55ReadISReg : SchedReadVariant<[
233e8d8bef9SDimitry Andric        SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
234e8d8bef9SDimitry Andric        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
235e8d8bef9SDimitry Andricdef : SchedAlias<ReadISReg, CortexA55ReadISReg>;
236e8d8bef9SDimitry Andric
237e8d8bef9SDimitry Andricdef CortexA55ReadIEReg : SchedReadVariant<[
238e8d8bef9SDimitry Andric        SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
239e8d8bef9SDimitry Andric        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
240e8d8bef9SDimitry Andricdef : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
241e8d8bef9SDimitry Andric
242e8d8bef9SDimitry Andric// MUL
243e8d8bef9SDimitry Andricdef : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
244e8d8bef9SDimitry Andric                              WriteISReg, WriteIEReg,WriteIS,
245e8d8bef9SDimitry Andric                              WriteID32,WriteID64,
246e8d8bef9SDimitry Andric                              WriteIM32,WriteIM64]>;
247e8d8bef9SDimitry Andricdef : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
248e8d8bef9SDimitry Andric                               WriteISReg, WriteIEReg,WriteIS,
249e8d8bef9SDimitry Andric                               WriteID32,WriteID64,
250e8d8bef9SDimitry Andric                               WriteIM32,WriteIM64]>;
251e8d8bef9SDimitry Andric
252e8d8bef9SDimitry Andric// Div
253e8d8bef9SDimitry Andricdef : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
254e8d8bef9SDimitry Andric                              WriteISReg, WriteIEReg,WriteIS,
255e8d8bef9SDimitry Andric                              WriteID32,WriteID64,
256e8d8bef9SDimitry Andric                              WriteIM32,WriteIM64]>;
257e8d8bef9SDimitry Andric
258e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
259e8d8bef9SDimitry Andric// Subtarget-specific InstRWs.
260e8d8bef9SDimitry Andric
261e8d8bef9SDimitry Andric//---
262e8d8bef9SDimitry Andric// Miscellaneous
263e8d8bef9SDimitry Andric//---
26404eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>;
26504eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>;
26604eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>;
26704eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>;
26804eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>;
26904eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>;
27004eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>;
27104eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>;
272e8d8bef9SDimitry Andricdef : InstRW<[WriteI], (instrs COPY)>;
273e8d8bef9SDimitry Andric//---
274e8d8bef9SDimitry Andric// Vector Loads - 64-bit per cycle
275e8d8bef9SDimitry Andric//---
276e8d8bef9SDimitry Andric//   1-element structures
277e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;                // single element
278e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
279e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
280e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
281e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
282e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
283e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
284e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
285e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
286e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
287e8d8bef9SDimitry Andric
288*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>;
289*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
290*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
291*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
292*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
293*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
294*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
295*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
296*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
297*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
298e8d8bef9SDimitry Andric
299e8d8bef9SDimitry Andric//    2-element structures
300e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
301e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
302e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
303e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
304e8d8bef9SDimitry Andric
305*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
306*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
307*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
308*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
309e8d8bef9SDimitry Andric
310e8d8bef9SDimitry Andric//    3-element structures
311e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
312e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
313e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
314e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
315e8d8bef9SDimitry Andric
316*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>;
317*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
318*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
319*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
320e8d8bef9SDimitry Andric
321e8d8bef9SDimitry Andric//    4-element structures
322e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs.
323e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
324e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs.
325e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
326e8d8bef9SDimitry Andric
327*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>;
328*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
329*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
330*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
331e8d8bef9SDimitry Andric
332e8d8bef9SDimitry Andric//---
333e8d8bef9SDimitry Andric// Vector Stores
334e8d8bef9SDimitry Andric//---
335e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
336e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
337e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
338e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
339e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
340*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>;
341*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
342*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
343*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
344*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
345e8d8bef9SDimitry Andric
346e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
347e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
348e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
349*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>;
350*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
351*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
352e8d8bef9SDimitry Andric
353e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
354e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
355*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>;
356*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
357e8d8bef9SDimitry Andric
358e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
359e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
360*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>;
361*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
362e8d8bef9SDimitry Andric
363e8d8bef9SDimitry Andric//---
364e8d8bef9SDimitry Andric// Floating Point Conversions, MAC, DIV, SQRT
365e8d8bef9SDimitry Andric//---
366349cc55cSDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
367349cc55cSDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
368e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
369e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
370e8d8bef9SDimitry Andric
371e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
372e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
373e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
374e8d8bef9SDimitry Andric
375e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
376e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
377e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
378e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
379e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
380e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
381e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
382e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
383e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
384e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
385e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
386fe6060f1SDimitry Andric
38781ad6265SDimitry Andric// 4.15. Advanced SIMD integer instructions
38881ad6265SDimitry Andric// ASIMD absolute diff
38981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
39081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDv(16i8|4i32|8i16)")>;
39181ad6265SDimitry Andric// ASIMD absolute diff accum
39281ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ABAL?v")>;
39381ad6265SDimitry Andric// ASIMD absolute diff long
39481ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDLv")>;
39581ad6265SDimitry Andric// ASIMD arith #1
39681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "(ADD|SUB|NEG)v(1i64|2i32|4i16|8i8)",
39781ad6265SDimitry Andric  "[SU]R?HADDv(2i32|4i16|8i8)", "[SU]HSUBv(2i32|4i16|8i8)")>;
39881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "(ADD|SUB|NEG)v(2i64|4i32|8i16|16i8)",
39981ad6265SDimitry Andric  "[SU]R?HADDv(8i16|4i32|16i8)", "[SU]HSUBv(8i16|4i32|16i8)")>;
40081ad6265SDimitry Andric// ASIMD arith #2
40181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "ABSv(1i64|2i32|4i16|8i8)$",
40281ad6265SDimitry Andric  "[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$",
40381ad6265SDimitry Andric  "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$",
40481ad6265SDimitry Andric  "ADDPv(2i32|4i16|8i8)$")>;
40581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "ABSv(2i64|4i32|8i16|16i8)$",
40681ad6265SDimitry Andric  "[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$",
40781ad6265SDimitry Andric  "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$",
40881ad6265SDimitry Andric  "ADDPv(16i8|2i64|4i32|8i16)$")>;
40981ad6265SDimitry Andric// ASIMD arith #3
41081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex  "SADDLv", "UADDLv", "SADDWv",
41181ad6265SDimitry Andric  "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>;
41281ad6265SDimitry Andric// ASIMD arith #5
41381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "RADDHNv", "RSUBHNv")>;
41481ad6265SDimitry Andric// ASIMD arith, reduce
41581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex  "ADDVv", "SADDLVv", "UADDLVv")>;
41681ad6265SDimitry Andric// ASIMD compare #1
41781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>;
41881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>;
41981ad6265SDimitry Andric// ASIMD compare #2
42081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "CMTSTv(1i64|2i32|4i16|8i8)")>;
42181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "CMTSTv(2i64|4i32|8i16|16i8)")>;
42281ad6265SDimitry Andric// ASIMD logical $1
42381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_1], (instregex "(AND|EOR|NOT|ORN)v8i8",
42481ad6265SDimitry Andric  "(ORR|BIC)v(2i32|4i16|8i8)$", "MVNIv(2i|2s|4i16)")>;
42581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_1], (instregex "(AND|EOR|NOT|ORN)v16i8",
42681ad6265SDimitry Andric  "(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>;
42781ad6265SDimitry Andric// ASIMD max/min, basic
42881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>;
42981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>;
43081ad6265SDimitry Andric// SIMD max/min, reduce
43181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU](MAX|MIN)Vv")>;
43281ad6265SDimitry Andric// ASIMD multiply, by element
43381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$",
43481ad6265SDimitry Andric  "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>;
43581ad6265SDimitry Andric// ASIMD multiply
43681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instrs PMULv8i8)>;
43781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instrs PMULv16i8)>;
43881ad6265SDimitry Andric// ASIMD multiply accumulate
43981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaVd_4], (instregex "ML[AS]v(2i32|4i16|8i8)$")>;
44081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaVq_4], (instregex "ML[AS]v(16i8|4i32|8i16)$")>;
44181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaIxVq_4], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>;
44281ad6265SDimitry Andric// ASIMD multiply accumulate half
44381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "SQRDML[AS]H[vi]")>;
44481ad6265SDimitry Andric// ASIMD multiply accumulate long
44581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaLVq_4], (instregex "[SU]ML[AS]Lv")>;
44681ad6265SDimitry Andric// ASIMD multiply accumulate long #2
44781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "SQDML[AS]L[iv]")>;
44881ad6265SDimitry Andric// ASIMD dot product
44981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteDotVd_4], (instregex "[SU]DOTv8i8")>;
45081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteDotVq_4], (instregex "[SU]DOTv16i8")>;
45181ad6265SDimitry Andric// ASIMD dot product, by scalar
45281ad6265SDimitry Andricdef : InstRW<[CortexA55WriteDotScVq_4], (instregex "[SU]DOTlanev")>;
45381ad6265SDimitry Andric// ASIMD multiply long
45481ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]MULLv", "SQDMULL[iv]")>;
45581ad6265SDimitry Andric// ASIMD polynomial (8x8) multiply long
45681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instrs PMULLv8i8, PMULLv16i8)>;
45781ad6265SDimitry Andric// ASIMD pairwise add and accumulate
45881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ADALPv")>;
45981ad6265SDimitry Andric// ASIMD shift accumulate
46081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
46181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
46281ad6265SDimitry Andric// ASIMD shift accumulate #2
46381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]RSRA[vd]")>;
46481ad6265SDimitry Andric// ASIMD shift by immed
46581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "SHLd$", "SHLv",
46681ad6265SDimitry Andric  "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>;
46781ad6265SDimitry Andric// ASIMD shift by immed
46881ad6265SDimitry Andric// SXTL and UXTL are aliases for SHLL
46981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "[US]?SHLLv")>;
47081ad6265SDimitry Andric// ASIMD shift by immed #2
47181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHR(d|v2i32|v4i16|v8i8)",
47281ad6265SDimitry Andric  "RSHRNv(2i32|4i16|8i8)")>;
47381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHRv(16i8|2i64|4i32|8i16)",
47481ad6265SDimitry Andric  "RSHRNv(16i8|4i32|8i16)")>;
47581ad6265SDimitry Andric// ASIMD shift by register
47681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU]SHLv(1i64|2i32|4i16|8i8)")>;
47781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU]SHLv(2i64|4i32|8i16|16i8)")>;
47881ad6265SDimitry Andric// ASIMD shift by register #2
47981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>;
48081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>;
48181ad6265SDimitry Andric
482e8d8bef9SDimitry Andric}
483