1//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17// There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
18// A number of forwarding paths enable results of computations to be input
19// to subsequent operations before they are written to registers.
20// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22def CortexR52Model : SchedMachineModel {
23  let MicroOpBufferSize = 0;  // R52 is in-order processor
24  let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
25  let LoadLatency = 1;        // Optimistic, assuming no misses
26  let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
27  let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
28}
29
30
31//===----------------------------------------------------------------------===//
32// Define each kind of processor resource and number available.
33
34// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35// Cortex-R52 is an in-order processor.
36
37def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
38def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
39def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
40def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
41def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
42def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
43def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
44def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
45
46// Cortex-R52 specific SchedReads
47def R52Read_ISS   : SchedRead;
48def R52Read_EX1   : SchedRead;
49def R52Read_EX2   : SchedRead;
50def R52Read_WRI   : SchedRead;
51def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
52def R52Read_F1    : SchedRead;
53def R52Read_F2    : SchedRead;
54
55
56//===----------------------------------------------------------------------===//
57// Subtarget-specific SchedWrite types which map ProcResources and set latency.
58
59let SchedModel = CortexR52Model in {
60
61// ALU - Write occurs in Late EX2 (independent of whether shift was required)
62def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
63def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
64def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
65def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
66
67// Compares
68def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
69def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
70def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
71
72// Multiply - aliased to sub-target specific later
73
74// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
75def : WriteRes<WriteDIV, [R52UnitDiv]> {
76  let Latency = 8; let ResourceCycles = [8]; // non-pipelined
77}
78
79// Branches  - LR written in Late EX2
80def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
81def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
82def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
83
84// Misc
85def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
86
87// Integer pipeline by-passes
88def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
89def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
90def : ReadAdvance<ReadMUL, 0>;
91def : ReadAdvance<ReadMAC, 0>;
92
93// Floating-point. Map target-defined SchedReadWrites to subtarget
94def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
95
96def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
97  let Latency = 6;
98}
99
100def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
101  let Latency = 11;     // as it is internally two insns (MUL then ADD)
102}
103
104def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
105                              R52UnitFPALU, R52UnitFPALU]> {
106  let Latency = 11;
107}
108
109def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
110  let Latency = 7;          // FP div takes fixed #cycles
111  let ResourceCycles = [7]; // is not pipelined
112}
113
114def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
115  let Latency = 17;
116  let ResourceCycles = [17];
117}
118
119def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
120def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
121
122// Overriden via InstRW for this processor.
123def : WriteRes<WriteVST1, []>;
124def : WriteRes<WriteVST2, []>;
125def : WriteRes<WriteVST3, []>;
126def : WriteRes<WriteVST4, []>;
127
128def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
129def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
130
131//===----------------------------------------------------------------------===//
132// Subtarget-specific SchedReadWrites.
133
134// Forwarding information - based on when an operand is read
135def : ReadAdvance<R52Read_ISS, 0>;
136def : ReadAdvance<R52Read_EX1, 1>;
137def : ReadAdvance<R52Read_EX2, 2>;
138def : ReadAdvance<R52Read_F0, 0>;
139def : ReadAdvance<R52Read_F1, 1>;
140def : ReadAdvance<R52Read_F2, 2>;
141
142
143// Cortex-R52 specific SchedWrites for use with InstRW
144def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
145def R52WriteMACHi      : SchedWriteRes<[R52UnitMAC]> {
146  let Latency = 4; let NumMicroOps = 0;
147}
148def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
149  let Latency = 8; let ResourceCycles = [8]; // not pipelined
150}
151def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
154def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
155def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
156def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
157def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
158
159def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
160def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
161
162// Alias generics to sub-target specific
163def : SchedAlias<WriteMUL16, R52WriteMAC>;
164def : SchedAlias<WriteMUL32, R52WriteMAC>;
165def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
166def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
167def : SchedAlias<WriteMAC16, R52WriteMAC>;
168def : SchedAlias<WriteMAC32, R52WriteMAC>;
169def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
170def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
171def : SchedAlias<WritePreLd, R52WriteLd>;
172def : SchedAlias<WriteLd, R52WriteLd>;
173def : SchedAlias<WriteST, R52WriteST>;
174
175def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
176def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
177  let Latency = 4;
178}
179def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
180def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
181  let Latency = 5;
182}
183def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
184def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
185  let Latency = 6;
186}
187def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
188def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
189  let Latency = 6;
190}
191def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
192  let Latency = 11;     // as it is internally two insns (MUL then ADD)
193}
194def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
195                                         R52UnitFPALU, R52UnitFPALU]> {
196  let Latency = 11;
197}
198
199def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201
202//===----------------------------------------------------------------------===//
203// Floating-point. Map target defined SchedReadWrites to processor specific ones
204//
205def : SchedAlias<WriteFPCVT,   R52WriteFPALU_F5>;
206def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
207def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
208def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
209
210//===----------------------------------------------------------------------===//
211// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
212//
213def : InstRW<[WriteALU], (instrs COPY)>;
214
215def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
216      (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
217      "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
218
219def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
220      (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
221def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
222      (instregex "MOV_ga_pcrel$")>;
223def : InstRW<[R52WriteLd,R52Read_ISS],
224      (instregex "MOV_ga_pcrel_ldr")>;
225
226def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
227
228def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
229      (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
230      "(t|t2)UBFX", "(t|t2)SBFX")>;
231
232// Saturating arithmetic
233def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
234      (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
235      "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
236      "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
237      "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
238      "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
239      "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
240
241// Parallel arithmetic
242def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
243      (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
244      "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
245      "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
246      "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
247
248// Flag setting.
249def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
250      (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
251      "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
252      "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
253      "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
254      "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
255      "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
256
257// Sum of Absolute Difference
258def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
259      (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
260
261// Integer Multiply
262def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
263      (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
264      "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
265      "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
266      "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
267
268// Multiply Accumulate
269// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
270// The store pipeline is used partly for 64-bit operations.
271def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
272      (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
273      "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
274      "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
275      "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
276      "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
277      "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
278      "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
279      "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
280      "SMLAL", "UMLAL", "SMLALBT",
281      "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
282      "UMAAL", "t2SMLAL", "t2UMLAL",
283      "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
284      "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
285
286def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
287      (instregex "t2SDIV", "t2UDIV")>;
288
289// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
290// However, that's non-trivial to specify, so we keep it uniform
291def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
292      (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
293      "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
294      "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
295      "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
296      "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
297      "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
298def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
299      (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
300      "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
301      "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
302      "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
303      "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
304      "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
305
306def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
307def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
308
309def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
310      "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
311      "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
312      "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
313
314def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
315      "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
316      "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
317
318def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
319      "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
320      "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
321
322def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
323      (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
324      "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
325
326def : InstRW<[R52WriteALU_EX1],
327    (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
328
329def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
330def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
331      (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
332
333def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
334def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
335def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
336def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
337
338def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
339      (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
340
341def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
342
343def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
344def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
345
346// Integer Load, Multiple.
347foreach Lat = 3-25 in {
348  def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
349    let Latency = Lat;
350  }
351  def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
352    let Latency = Lat;
353    let NumMicroOps = 0;
354  }
355}
356foreach NAddr = 1-16 in {
357  def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
358}
359def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
360def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
361def R52WriteILDM : SchedWriteVariant<[
362    SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
363
364    SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
365                                 R52WriteILDM6Cy]>,
366    SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
367                                 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
368
369    SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
370                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
371                                 R52WriteILDM8Cy]>,
372    SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
373                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
374                                 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
375
376    SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
377                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
378                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
379                                 R52WriteILDM10Cy]>,
380    SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
381                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
382                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
383                                 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
384
385    SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
386                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
387                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
388                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
389                                 R52WriteILDM12Cy]>,
390    SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
391                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
392                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
393                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
394                                 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
395
396    SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
397                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
398                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
399                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
400                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
401                                 R52WriteILDM14Cy]>,
402    SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
403                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
404                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
405                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
406                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
407                                 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
408
409    SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
410                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
411                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
412                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
413                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
414                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
415                                 R52WriteILDM16Cy]>,
416    SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
417                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
418                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
419                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
420                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
421                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
422                                 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
423
424    SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
425                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
426                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
427                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
428                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
429                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
430                                 R52WriteILDM16Cy, R52WriteILDM17Cy,
431                                 R52WriteILDM18Cy]>,
432    SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
433                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
434                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
435                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
436                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
437                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
438                                 R52WriteILDM16Cy, R52WriteILDM17Cy,
439                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
440
441// Unknown number of registers, just use resources for two registers.
442    SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
443                                R52WriteILDM6CyNo, R52WriteILDM7CyNo,
444                                R52WriteILDM8CyNo, R52WriteILDM9CyNo,
445                                R52WriteILDM10CyNo, R52WriteILDM11CyNo,
446                                R52WriteILDM12CyNo, R52WriteILDM13CyNo,
447                                R52WriteILDM14CyNo, R52WriteILDM15CyNo,
448                                R52WriteILDM16CyNo, R52WriteILDM17CyNo,
449                                R52WriteILDM18Cy, R52WriteILDM19Cy]>
450]> { let Variadic=1; }
451
452// Integer Store, Multiple
453def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
454  let Latency = 4;
455  let NumMicroOps = 2;
456}
457foreach NumAddr = 1-16 in {
458  def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
459}
460def R52WriteISTM : SchedWriteVariant<[
461    SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
462    SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
463    SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
464    SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
465    SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
466    SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
467    SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
468    SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
469    SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
470    SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
471    SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
472    SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
473    SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
474    SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
475    SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
476    // Unknow number of registers, just use resources for two registers.
477    SchedVar<NoSchedPred,      [R52WriteISTM2]>
478]>;
479
480def : InstRW<[R52WriteILDM, R52Read_ISS],
481      (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
482      "(t|sys)LDM(IA|DA|DB|IB)$")>;
483def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
484      (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
485def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
486        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
487
488// Integer Store, Single Element
489def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
490      (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
491      "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
492      "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
493
494def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
495      (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
496      "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
497      "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
498      "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
499
500// Integer Store, Dual
501def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
502    (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
503def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
504    (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
505
506def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
507    (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
508def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
509    (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
510    "tPUSH")>;
511
512// LDRLIT pseudo instructions, they expand to LDR + PICADD
513def : InstRW<[R52WriteLd],
514      (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
515// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
516def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
517
518
519
520//===----------------------------------------------------------------------===//
521// VFP, Floating Point Support
522def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
523def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
524
525def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
526def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
527def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
528
529def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
530def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
531
532def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
533def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
534
535def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
536def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
537
538
539//===----------------------------------------------------------------------===//
540// Neon Support
541
542// vector multiple load stores
543foreach NumAddr = 1-16 in {
544  def R52LMAddrPred#NumAddr :
545    SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
546}
547foreach Lat = 1-32 in {
548  def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
549    let Latency = Lat;
550  }
551}
552foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
553  def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
554    let Latency = 0;
555    let NumMicroOps = Num;
556    let ResourceCycles = [Num];
557  }
558}
559def R52WriteVLDM : SchedWriteVariant<[
560  // 1 D reg
561  SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
562                              R52ReserveLd5Cy]>,
563  SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
564                              R52ReserveLd5Cy]>,
565
566  // 2 D reg
567  SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
568                              R52ReserveLd6Cy]>,
569  SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
570                              R52ReserveLd6Cy]>,
571
572  // 3 D reg
573  SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
574                              R52WriteLM7Cy,
575                              R52ReserveLd4Cy]>,
576  SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
577                              R52WriteLM7Cy,
578                              R52ReserveLd7Cy]>,
579
580  // 4 D reg
581  SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
582                              R52WriteLM7Cy, R52WriteLM8Cy,
583                              R52ReserveLd8Cy]>,
584  SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
585                              R52WriteLM7Cy, R52WriteLM8Cy,
586                              R52ReserveLd8Cy]>,
587
588  // 5 D reg
589  SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
590                              R52WriteLM7Cy, R52WriteLM8Cy,
591                              R52WriteLM9Cy,
592                              R52ReserveLd9Cy]>,
593  SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
594                              R52WriteLM7Cy, R52WriteLM8Cy,
595                              R52WriteLM9Cy,
596                              R52ReserveLd9Cy]>,
597
598  // 6 D reg
599  SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
600                              R52WriteLM7Cy, R52WriteLM8Cy,
601                              R52WriteLM9Cy, R52WriteLM10Cy,
602                              R52ReserveLd10Cy]>,
603  SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
604                              R52WriteLM7Cy, R52WriteLM8Cy,
605                              R52WriteLM9Cy, R52WriteLM10Cy,
606                              R52ReserveLd10Cy]>,
607
608  // 7 D reg
609  SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
610                              R52WriteLM7Cy, R52WriteLM8Cy,
611                              R52WriteLM9Cy, R52WriteLM10Cy,
612                              R52WriteLM11Cy,
613                              R52ReserveLd11Cy]>,
614  SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
615                              R52WriteLM7Cy, R52WriteLM8Cy,
616                              R52WriteLM9Cy, R52WriteLM10Cy,
617                              R52WriteLM11Cy,
618                              R52ReserveLd11Cy]>,
619
620  // 8 D reg
621  SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
622                              R52WriteLM7Cy, R52WriteLM8Cy,
623                              R52WriteLM9Cy, R52WriteLM10Cy,
624                              R52WriteLM11Cy, R52WriteLM12Cy,
625                              R52ReserveLd12Cy]>,
626  SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
627                              R52WriteLM7Cy, R52WriteLM8Cy,
628                              R52WriteLM9Cy, R52WriteLM10Cy,
629                              R52WriteLM11Cy, R52WriteLM12Cy,
630                              R52ReserveLd12Cy]>,
631  // unknown number of reg.
632  SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
633                              R52WriteLM7Cy, R52WriteLM8Cy,
634                              R52WriteLM9Cy, R52WriteLM10Cy,
635                              R52WriteLM11Cy, R52WriteLM12Cy,
636                              R52ReserveLd5Cy]>
637]> { let Variadic=1;}
638
639// variable stores. Cannot dual-issue
640def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
641  let Latency = 5;
642  let NumMicroOps = 2;
643  let ResourceCycles = [1];
644}
645def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
646  let Latency = 6;
647  let NumMicroOps = 4;
648  let ResourceCycles = [2];
649}
650def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
651  let Latency = 7;
652  let NumMicroOps = 6;
653  let ResourceCycles = [3];
654}
655def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
656  let Latency = 8;
657  let NumMicroOps = 8;
658  let ResourceCycles = [4];
659}
660def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
661  let Latency = 9;
662  let NumMicroOps = 10;
663  let ResourceCycles = [5];
664}
665def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
666  let Latency = 10;
667  let NumMicroOps = 12;
668  let ResourceCycles = [6];
669}
670def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
671  let Latency = 11;
672  let NumMicroOps = 14;
673  let ResourceCycles = [7];
674}
675def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
676  let Latency = 12;
677  let NumMicroOps = 16;
678  let ResourceCycles = [8];
679}
680def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
681  let Latency = 13;
682  let NumMicroOps = 18;
683  let ResourceCycles = [9];
684}
685def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
686  let Latency = 14;
687  let NumMicroOps = 20;
688  let ResourceCycles = [10];
689}
690def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
691  let Latency = 15;
692  let NumMicroOps = 22;
693  let ResourceCycles = [11];
694}
695
696def R52WriteSTM : SchedWriteVariant<[
697  SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
698  SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
699  SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
700  SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
701  SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
702  SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
703  SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
704  SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
705  SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
706  SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
707  SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
708  SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
709  SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
710  SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
711  SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
712  SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
713  // unknown number of registers, just use resources for two
714  SchedVar<NoSchedPred,      [R52WriteSTM6]>
715]>;
716
717// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
718// another instruction in slot-1, but only in the last issue.
719def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
720def : WriteRes<WriteVLD2, [R52UnitLd]> {
721  let Latency = 6;
722  let NumMicroOps = 3;
723  let ResourceCycles = [2];
724  let SingleIssue = 1;
725}
726def : WriteRes<WriteVLD3, [R52UnitLd]> {
727  let Latency = 7;
728  let NumMicroOps = 5;
729  let ResourceCycles = [3];
730  let SingleIssue = 1;
731}
732def : WriteRes<WriteVLD4, [R52UnitLd]> {
733  let Latency = 8;
734  let NumMicroOps = 7;
735  let ResourceCycles = [4];
736  let SingleIssue = 1;
737}
738def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
739  let Latency = 5;
740  let NumMicroOps = 1;
741  let ResourceCycles = [1];
742}
743def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
744  let Latency = 6;
745  let NumMicroOps = 3;
746  let ResourceCycles = [2];
747}
748def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
749  let Latency = 7;
750  let NumMicroOps = 5;
751  let ResourceCycles = [3];
752}
753def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
754  let Latency = 8;
755  let NumMicroOps = 7;
756  let ResourceCycles = [4];
757}
758def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
759  let Latency = 9;
760  let NumMicroOps = 9;
761  let ResourceCycles = [5];
762}
763
764
765def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
766def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
767def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
768
769def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
770def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
771def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
772
773def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
774
775def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
776                               (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
777def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
778                                (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
779def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
780                               (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
781
782def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
783                                            (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
784
785def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
786def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
787
788def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
789def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
790
791def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
792def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
793
794def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
795      (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
796def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
797      (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
798
799def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
800def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
801def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
802def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
803
804def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
805def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
806
807def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
808def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
809
810def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
811def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
812
813def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
814def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
815def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
816def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
817def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
818def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
819def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
820def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
821def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
822def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
823                  (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
824def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
825                  (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
826def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
827def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
828def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
829                 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
830def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
831def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
832
833//---
834// VSTx. Vector Stores
835//---
836// 1-element structure store
837def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
838def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
839def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
840def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
841def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
842def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
843
844def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
845def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
846def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
847
848def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
849def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
850def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
851def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
852def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
853def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
854
855def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
856def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
857def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
858
859// 2-element structure store
860def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
861def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
862def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
863
864def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
865def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
866def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
867def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
868def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
869def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
870
871def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
872def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
873def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
874
875def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
876def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
877def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
878def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
879def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
880def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
881
882// 3-element structure store
883def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
884def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
885def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
886
887def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
888def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
889def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
890def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
891def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
892def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
893
894def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
895def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
896def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
897
898def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
899def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
900def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
901def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
902def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
903def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
904
905// 4-element structure store
906def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
907def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
908def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
909
910def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
911def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
912def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
913def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
914def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
915def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
916
917def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
918def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
919def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
920
921def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
922def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
923def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
924def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
925def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
926def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
927
928} // R52 SchedModel
929