1//=- X86ScheduleZnver2.td - X86 Znver2 Scheduling -------------*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for Znver2 to support instruction
10// scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14def Znver2Model : SchedMachineModel {
15  // Zen can decode 4 instructions per cycle.
16  let IssueWidth = 4;
17  // Based on the reorder buffer we define MicroOpBufferSize
18  let MicroOpBufferSize = 224;
19  let LoadLatency = 4;
20  let MispredictPenalty = 17;
21  let HighLatency = 25;
22  let PostRAScheduler = 1;
23
24  // FIXME: This variable is required for incomplete model.
25  // We haven't catered all instructions.
26  // So, we reset the value of this variable so as to
27  // say that the model is incomplete.
28  let CompleteModel = 0;
29}
30
31let SchedModel = Znver2Model in {
32
33// Zen can issue micro-ops to 10 different units in one cycle.
34// These are
35//  * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
36//  * Three AGU units (ZAGU0, ZAGU1, ZAGU2)
37//  * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
38// AGUs feed load store queues @two loads and 1 store per cycle.
39
40// Four ALU units are defined below
41def Zn2ALU0 : ProcResource<1>;
42def Zn2ALU1 : ProcResource<1>;
43def Zn2ALU2 : ProcResource<1>;
44def Zn2ALU3 : ProcResource<1>;
45
46// Three AGU units are defined below
47def Zn2AGU0 : ProcResource<1>;
48def Zn2AGU1 : ProcResource<1>;
49def Zn2AGU2 : ProcResource<1>;
50
51// Four FPU units are defined below
52def Zn2FPU0 : ProcResource<1>;
53def Zn2FPU1 : ProcResource<1>;
54def Zn2FPU2 : ProcResource<1>;
55def Zn2FPU3 : ProcResource<1>;
56
57// FPU grouping
58def Zn2FPU013  : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU3]>;
59def Zn2FPU01   : ProcResGroup<[Zn2FPU0, Zn2FPU1]>;
60def Zn2FPU12   : ProcResGroup<[Zn2FPU1, Zn2FPU2]>;
61def Zn2FPU13   : ProcResGroup<[Zn2FPU1, Zn2FPU3]>;
62def Zn2FPU23   : ProcResGroup<[Zn2FPU2, Zn2FPU3]>;
63def Zn2FPU02   : ProcResGroup<[Zn2FPU0, Zn2FPU2]>;
64def Zn2FPU03   : ProcResGroup<[Zn2FPU0, Zn2FPU3]>;
65
66// Below are the grouping of the units.
67// Micro-ops to be issued to multiple units are tackled this way.
68
69// ALU grouping
70// Zn2ALU03 - 0,3 grouping
71def Zn2ALU03: ProcResGroup<[Zn2ALU0, Zn2ALU3]>;
72
73// 64 Entry (16x4 entries) Int Scheduler
74def Zn2ALU : ProcResGroup<[Zn2ALU0, Zn2ALU1, Zn2ALU2, Zn2ALU3]> {
75  let BufferSize=64;
76}
77
78// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
79// but are relevant for some instructions
80def Zn2AGU : ProcResGroup<[Zn2AGU0, Zn2AGU1, Zn2AGU2]> {
81  let BufferSize=28;
82}
83
84// Integer Multiplication issued on ALU1.
85def Zn2Multiplier : ProcResource<1>;
86
87// Integer division issued on ALU2.
88def Zn2Divider : ProcResource<1>;
89
90// 4 Cycles load-to use Latency is captured
91def : ReadAdvance<ReadAfterLd, 4>;
92
93// 7 Cycles vector load-to use Latency is captured
94def : ReadAdvance<ReadAfterVecLd, 7>;
95def : ReadAdvance<ReadAfterVecXLd, 7>;
96def : ReadAdvance<ReadAfterVecYLd, 7>;
97
98def : ReadAdvance<ReadInt2Fpu, 0>;
99
100// The Integer PRF for Zen is 168 entries, and it holds the architectural and
101// speculative version of the 64-bit integer registers.
102// Reference: "Software Optimization Guide for AMD Family 17h Processors"
103def Zn2IntegerPRF : RegisterFile<168, [GR64, CCR]>;
104
105// 36 Entry (9x4 entries) floating-point Scheduler
106def Zn2FPU     : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU2, Zn2FPU3]> {
107  let BufferSize=36;
108}
109
110// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
111// registers. Operations on 256-bit data types are cracked into two COPs.
112// Reference: "Software Optimization Guide for AMD Family 17h Processors"
113def Zn2FpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
114
115// The unit can track up to 192 macro ops in-flight.
116// The retire unit handles in-order commit of up to 8 macro ops per cycle.
117// Reference: "Software Optimization Guide for AMD Family 17h Processors"
118// To be noted, the retire unit is shared between integer and FP ops.
119// In SMT mode it is 96 entry per thread. But, we do not use the conservative
120// value here because there is currently no way to fully mode the SMT mode,
121// so there is no point in trying.
122def Zn2RCU : RetireControlUnit<192, 8>;
123
124// (a folded load is an instruction that loads and does some operation)
125// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
126// Instructions with folded loads are usually micro-fused, so they only appear
127// as two micro-ops.
128//      a. load and
129//      b. addpd
130// This multiclass is for folded loads for integer units.
131multiclass Zn2WriteResPair<X86FoldableSchedWrite SchedRW,
132                          list<ProcResourceKind> ExePorts,
133                          int Lat, list<int> Res = [], int UOps = 1,
134                          int LoadLat = 4, int LoadUOps = 1> {
135  // Register variant takes 1-cycle on Execution Port.
136  def : WriteRes<SchedRW, ExePorts> {
137    let Latency = Lat;
138    let ResourceCycles = Res;
139    let NumMicroOps = UOps;
140  }
141
142  // Memory variant also uses a cycle on Zn2AGU
143  // adds LoadLat cycles to the latency (default = 4).
144  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
145    let Latency = !add(Lat, LoadLat);
146    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
147    let NumMicroOps = !add(UOps, LoadUOps);
148  }
149}
150
151// This multiclass is for folded loads for floating point units.
152multiclass Zn2WriteResFpuPair<X86FoldableSchedWrite SchedRW,
153                          list<ProcResourceKind> ExePorts,
154                          int Lat, list<int> Res = [], int UOps = 1,
155                          int LoadLat = 7, int LoadUOps = 0> {
156  // Register variant takes 1-cycle on Execution Port.
157  def : WriteRes<SchedRW, ExePorts> {
158    let Latency = Lat;
159    let ResourceCycles = Res;
160    let NumMicroOps = UOps;
161  }
162
163  // Memory variant also uses a cycle on Zn2AGU
164  // adds LoadLat cycles to the latency (default = 7).
165  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
166    let Latency = !add(Lat, LoadLat);
167    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
168    let NumMicroOps = !add(UOps, LoadUOps);
169  }
170}
171
172// WriteRMW is set for instructions with Memory write
173// operation in codegen
174def : WriteRes<WriteRMW, [Zn2AGU]>;
175
176def : WriteRes<WriteStore,   [Zn2AGU]>;
177def : WriteRes<WriteStoreNT, [Zn2AGU]>;
178def : WriteRes<WriteMove,    [Zn2ALU]>;
179def : WriteRes<WriteLoad,    [Zn2AGU]> { let Latency = 8; }
180
181def : WriteRes<WriteZero,  []>;
182def : WriteRes<WriteLEA, [Zn2ALU]>;
183defm : Zn2WriteResPair<WriteALU,   [Zn2ALU], 1>;
184defm : Zn2WriteResPair<WriteADC,   [Zn2ALU], 1>;
185
186defm : Zn2WriteResPair<WriteIMul8,     [Zn2ALU1, Zn2Multiplier], 4>;
187
188defm : X86WriteRes<WriteBSWAP32, [Zn2ALU], 1, [4], 1>;
189defm : X86WriteRes<WriteBSWAP64, [Zn2ALU], 1, [4], 1>;
190defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 1, [1], 1>;
191defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
192defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
193
194defm : Zn2WriteResPair<WriteShift, [Zn2ALU], 1>;
195defm : Zn2WriteResPair<WriteShiftCL,  [Zn2ALU], 1>;
196defm : Zn2WriteResPair<WriteRotate,   [Zn2ALU], 1>;
197defm : Zn2WriteResPair<WriteRotateCL, [Zn2ALU], 1>;
198
199defm : X86WriteRes<WriteSHDrri, [Zn2ALU], 1, [1], 1>;
200defm : X86WriteResUnsupported<WriteSHDrrcl>;
201defm : X86WriteResUnsupported<WriteSHDmri>;
202defm : X86WriteResUnsupported<WriteSHDmrcl>;
203
204defm : Zn2WriteResPair<WriteJump,  [Zn2ALU], 1>;
205defm : Zn2WriteResFpuPair<WriteCRC32, [Zn2FPU0], 3>;
206
207defm : Zn2WriteResPair<WriteCMOV,   [Zn2ALU], 1>;
208def  : WriteRes<WriteSETCC,  [Zn2ALU]>;
209def  : WriteRes<WriteSETCCStore,  [Zn2ALU, Zn2AGU]>;
210defm : X86WriteRes<WriteLAHFSAHF, [Zn2ALU], 2, [1], 2>;
211
212defm : X86WriteRes<WriteBitTest,         [Zn2ALU], 1, [1], 1>;
213defm : X86WriteRes<WriteBitTestImmLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
214defm : X86WriteRes<WriteBitTestRegLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
215defm : X86WriteRes<WriteBitTestSet,      [Zn2ALU], 2, [1], 2>;
216
217// Bit counts.
218defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
219defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 3>;
220defm : Zn2WriteResPair<WriteLZCNT,          [Zn2ALU], 1>;
221defm : Zn2WriteResPair<WriteTZCNT,          [Zn2ALU], 2>;
222defm : Zn2WriteResPair<WritePOPCNT,         [Zn2ALU], 1>;
223
224// Treat misc copies as a move.
225def : InstRW<[WriteMove], (instrs COPY)>;
226
227// BMI1 BEXTR, BMI2 BZHI
228defm : Zn2WriteResPair<WriteBEXTR, [Zn2ALU], 1>;
229defm : Zn2WriteResPair<WriteBZHI, [Zn2ALU], 1>;
230
231// IDIV
232defm : Zn2WriteResPair<WriteDiv8,   [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
233defm : Zn2WriteResPair<WriteDiv16,  [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
234defm : Zn2WriteResPair<WriteDiv32,  [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
235defm : Zn2WriteResPair<WriteDiv64,  [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
236defm : Zn2WriteResPair<WriteIDiv8,  [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
237defm : Zn2WriteResPair<WriteIDiv16, [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
238defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
239defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
240
241// IMULH
242def  : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
243  let Latency = 4;
244}
245
246// Floating point operations
247defm : X86WriteRes<WriteFLoad,         [Zn2AGU], 8, [1], 1>;
248defm : X86WriteRes<WriteFLoadX,        [Zn2AGU], 8, [1], 1>;
249defm : X86WriteRes<WriteFLoadY,        [Zn2AGU], 8, [1], 1>;
250defm : X86WriteRes<WriteFMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,1], 1>;
251defm : X86WriteRes<WriteFMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,1], 2>;
252defm : X86WriteRes<WriteFMaskedStore32,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
253defm : X86WriteRes<WriteFMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
254defm : X86WriteRes<WriteFMaskedStore64,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
255defm : X86WriteRes<WriteFMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
256
257defm : X86WriteRes<WriteFStore,        [Zn2AGU], 1, [1], 1>;
258defm : X86WriteRes<WriteFStoreX,       [Zn2AGU], 1, [1], 1>;
259defm : X86WriteRes<WriteFStoreY,       [Zn2AGU], 1, [1], 1>;
260defm : X86WriteRes<WriteFStoreNT,      [Zn2AGU,Zn2FPU2], 8, [1,1], 1>;
261defm : X86WriteRes<WriteFStoreNTX,     [Zn2AGU], 1, [1], 1>;
262defm : X86WriteRes<WriteFStoreNTY,     [Zn2AGU], 1, [1], 1>;
263defm : X86WriteRes<WriteFMove,         [Zn2FPU], 1, [1], 1>;
264defm : X86WriteRes<WriteFMoveX,        [Zn2FPU], 1, [1], 1>;
265defm : X86WriteRes<WriteFMoveY,        [Zn2FPU], 1, [1], 1>;
266
267defm : Zn2WriteResFpuPair<WriteFAdd,      [Zn2FPU0],  3>;
268defm : Zn2WriteResFpuPair<WriteFAddX,     [Zn2FPU0],  3>;
269defm : Zn2WriteResFpuPair<WriteFAddY,     [Zn2FPU0],  3>;
270defm : X86WriteResPairUnsupported<WriteFAddZ>;
271defm : Zn2WriteResFpuPair<WriteFAdd64,    [Zn2FPU0],  3>;
272defm : Zn2WriteResFpuPair<WriteFAdd64X,   [Zn2FPU0],  3>;
273defm : Zn2WriteResFpuPair<WriteFAdd64Y,   [Zn2FPU0],  3>;
274defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
275defm : Zn2WriteResFpuPair<WriteFCmp,      [Zn2FPU0],  3>;
276defm : Zn2WriteResFpuPair<WriteFCmpX,     [Zn2FPU0],  3>;
277defm : Zn2WriteResFpuPair<WriteFCmpY,     [Zn2FPU0],  3>;
278defm : X86WriteResPairUnsupported<WriteFCmpZ>;
279defm : Zn2WriteResFpuPair<WriteFCmp64,    [Zn2FPU0],  3>;
280defm : Zn2WriteResFpuPair<WriteFCmp64X,   [Zn2FPU0],  3>;
281defm : Zn2WriteResFpuPair<WriteFCmp64Y,   [Zn2FPU0],  3>;
282defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
283defm : Zn2WriteResFpuPair<WriteFCom,      [Zn2FPU0],  3>;
284defm : Zn2WriteResFpuPair<WriteFBlend,    [Zn2FPU01], 1>;
285defm : Zn2WriteResFpuPair<WriteFBlendY,   [Zn2FPU01], 1>;
286defm : X86WriteResPairUnsupported<WriteFBlendZ>;
287defm : Zn2WriteResFpuPair<WriteFVarBlend, [Zn2FPU01], 1>;
288defm : Zn2WriteResFpuPair<WriteFVarBlendY,[Zn2FPU01], 1>;
289defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
290defm : Zn2WriteResFpuPair<WriteVarBlend,  [Zn2FPU0],  1>;
291defm : Zn2WriteResFpuPair<WriteVarBlendY, [Zn2FPU0],  1>;
292defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
293defm : Zn2WriteResFpuPair<WriteCvtSS2I,   [Zn2FPU3],  5>;
294defm : Zn2WriteResFpuPair<WriteCvtPS2I,   [Zn2FPU3],  5>;
295defm : Zn2WriteResFpuPair<WriteCvtPS2IY,  [Zn2FPU3],  5>;
296defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
297defm : Zn2WriteResFpuPair<WriteCvtSD2I,   [Zn2FPU3],  5>;
298defm : Zn2WriteResFpuPair<WriteCvtPD2I,   [Zn2FPU3],  5>;
299defm : Zn2WriteResFpuPair<WriteCvtPD2IY,  [Zn2FPU3],  5>;
300defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
301defm : Zn2WriteResFpuPair<WriteCvtI2SS,   [Zn2FPU3],  5>;
302defm : Zn2WriteResFpuPair<WriteCvtI2PS,   [Zn2FPU3],  5>;
303defm : Zn2WriteResFpuPair<WriteCvtI2PSY,  [Zn2FPU3],  5>;
304defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
305defm : Zn2WriteResFpuPair<WriteCvtI2SD,   [Zn2FPU3],  5>;
306defm : Zn2WriteResFpuPair<WriteCvtI2PD,   [Zn2FPU3],  5>;
307defm : Zn2WriteResFpuPair<WriteCvtI2PDY,  [Zn2FPU3],  5>;
308defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
309defm : Zn2WriteResFpuPair<WriteFDiv,      [Zn2FPU3], 15>;
310defm : Zn2WriteResFpuPair<WriteFDivX,     [Zn2FPU3], 15>;
311defm : X86WriteResPairUnsupported<WriteFDivZ>;
312defm : Zn2WriteResFpuPair<WriteFDiv64,    [Zn2FPU3], 15>;
313defm : Zn2WriteResFpuPair<WriteFDiv64X,   [Zn2FPU3], 15>;
314defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
315defm : Zn2WriteResFpuPair<WriteFSign,     [Zn2FPU3],  2>;
316defm : Zn2WriteResFpuPair<WriteFRnd,      [Zn2FPU3],  4, [1], 1, 7, 0>;
317defm : Zn2WriteResFpuPair<WriteFRndY,     [Zn2FPU3],  4, [1], 1, 7, 0>;
318defm : X86WriteResPairUnsupported<WriteFRndZ>;
319defm : Zn2WriteResFpuPair<WriteFLogic,    [Zn2FPU],   1>;
320defm : Zn2WriteResFpuPair<WriteFLogicY,   [Zn2FPU],   1>;
321defm : X86WriteResPairUnsupported<WriteFLogicZ>;
322defm : Zn2WriteResFpuPair<WriteFTest,     [Zn2FPU],   1>;
323defm : Zn2WriteResFpuPair<WriteFTestY,    [Zn2FPU],   1>;
324defm : X86WriteResPairUnsupported<WriteFTestZ>;
325defm : Zn2WriteResFpuPair<WriteFShuffle,  [Zn2FPU12], 1>;
326defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
327defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
328defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 1>;
329defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 1>;
330defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
331defm : Zn2WriteResFpuPair<WriteFMul,      [Zn2FPU01], 3, [1], 1, 7, 1>;
332defm : Zn2WriteResFpuPair<WriteFMulX,     [Zn2FPU01], 3, [1], 1, 7, 1>;
333defm : Zn2WriteResFpuPair<WriteFMulY,     [Zn2FPU01], 4, [1], 1, 7, 1>;
334defm : X86WriteResPairUnsupported<WriteFMulZ>;
335defm : Zn2WriteResFpuPair<WriteFMul64,    [Zn2FPU01], 3, [1], 1, 7, 1>;
336defm : Zn2WriteResFpuPair<WriteFMul64X,   [Zn2FPU01], 3, [1], 1, 7, 1>;
337defm : Zn2WriteResFpuPair<WriteFMul64Y,   [Zn2FPU01], 4, [1], 1, 7, 1>;
338defm : X86WriteResPairUnsupported<WriteFMul64Z>;
339defm : Zn2WriteResFpuPair<WriteFMA,       [Zn2FPU03], 5>;
340defm : Zn2WriteResFpuPair<WriteFMAX,      [Zn2FPU03], 5>;
341defm : Zn2WriteResFpuPair<WriteFMAY,      [Zn2FPU03], 5>;
342defm : X86WriteResPairUnsupported<WriteFMAZ>;
343defm : Zn2WriteResFpuPair<WriteFRcp,      [Zn2FPU01], 5>;
344defm : Zn2WriteResFpuPair<WriteFRcpX,     [Zn2FPU01], 5>;
345defm : Zn2WriteResFpuPair<WriteFRcpY,     [Zn2FPU01], 5, [1], 1, 7, 2>;
346defm : X86WriteResPairUnsupported<WriteFRcpZ>;
347defm : Zn2WriteResFpuPair<WriteFRsqrtX,   [Zn2FPU01], 5, [1], 1, 7, 1>;
348defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
349defm : Zn2WriteResFpuPair<WriteFSqrt,     [Zn2FPU3], 20, [20]>;
350defm : Zn2WriteResFpuPair<WriteFSqrtX,    [Zn2FPU3], 20, [20]>;
351defm : Zn2WriteResFpuPair<WriteFSqrtY,    [Zn2FPU3], 28, [28], 1, 7, 1>;
352defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
353defm : Zn2WriteResFpuPair<WriteFSqrt64,   [Zn2FPU3], 20, [20]>;
354defm : Zn2WriteResFpuPair<WriteFSqrt64X,  [Zn2FPU3], 20, [20]>;
355defm : Zn2WriteResFpuPair<WriteFSqrt64Y,  [Zn2FPU3], 20, [20], 1, 7, 1>;
356defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
357defm : Zn2WriteResFpuPair<WriteFSqrt80,   [Zn2FPU3], 20, [20]>;
358
359// Vector integer operations which uses FPU units
360defm : X86WriteRes<WriteVecLoad,         [Zn2AGU], 8, [1], 1>;
361defm : X86WriteRes<WriteVecLoadX,        [Zn2AGU], 8, [1], 1>;
362defm : X86WriteRes<WriteVecLoadY,        [Zn2AGU], 8, [1], 1>;
363defm : X86WriteRes<WriteVecLoadNT,       [Zn2AGU], 8, [1], 1>;
364defm : X86WriteRes<WriteVecLoadNTY,      [Zn2AGU], 8, [1], 1>;
365defm : X86WriteRes<WriteVecMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
366defm : X86WriteRes<WriteVecMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
367defm : X86WriteRes<WriteVecStore,        [Zn2AGU], 1, [1], 1>;
368defm : X86WriteRes<WriteVecStoreX,       [Zn2AGU], 1, [1], 1>;
369defm : X86WriteRes<WriteVecStoreY,       [Zn2AGU], 1, [1], 1>;
370defm : X86WriteRes<WriteVecStoreNT,      [Zn2AGU], 1, [1], 1>;
371defm : X86WriteRes<WriteVecStoreNTY,     [Zn2AGU], 1, [1], 1>;
372defm : X86WriteRes<WriteVecMaskedStore,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
373defm : X86WriteRes<WriteVecMaskedStoreY, [Zn2AGU,Zn2FPU01], 5, [1,1], 2>;
374defm : X86WriteRes<WriteVecMove,         [Zn2FPU], 1, [1], 1>;
375defm : X86WriteRes<WriteVecMoveX,        [Zn2FPU], 1, [1], 1>;
376defm : X86WriteRes<WriteVecMoveY,        [Zn2FPU], 2, [1], 2>;
377defm : X86WriteRes<WriteVecMoveToGpr,    [Zn2FPU2], 2, [1], 1>;
378defm : X86WriteRes<WriteVecMoveFromGpr,  [Zn2FPU2], 3, [1], 1>;
379defm : X86WriteRes<WriteEMMS,            [Zn2FPU], 2, [1], 1>;
380
381defm : Zn2WriteResFpuPair<WriteVecShift,   [Zn2FPU],   1>;
382defm : Zn2WriteResFpuPair<WriteVecShiftX,  [Zn2FPU2],  1>;
383defm : Zn2WriteResFpuPair<WriteVecShiftY,  [Zn2FPU2],  2>;
384defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
385defm : Zn2WriteResFpuPair<WriteVecShiftImm,  [Zn2FPU], 1>;
386defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
387defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU], 1>;
388defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
389defm : Zn2WriteResFpuPair<WriteVecLogic,   [Zn2FPU],   1>;
390defm : Zn2WriteResFpuPair<WriteVecLogicX,  [Zn2FPU],   1>;
391defm : Zn2WriteResFpuPair<WriteVecLogicY,  [Zn2FPU],   1>;
392defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
393defm : Zn2WriteResFpuPair<WriteVecTest,    [Zn2FPU12], 1, [2], 1, 7, 1>;
394defm : Zn2WriteResFpuPair<WriteVecTestY,   [Zn2FPU12], 1, [2], 1, 7, 1>;
395defm : X86WriteResPairUnsupported<WriteVecTestZ>;
396defm : Zn2WriteResFpuPair<WriteVecALU,     [Zn2FPU],   1>;
397defm : Zn2WriteResFpuPair<WriteVecALUX,    [Zn2FPU],   1>;
398defm : Zn2WriteResFpuPair<WriteVecALUY,    [Zn2FPU],   1>;
399defm : X86WriteResPairUnsupported<WriteVecALUZ>;
400defm : Zn2WriteResFpuPair<WriteVecIMul,    [Zn2FPU0],  4>;
401defm : Zn2WriteResFpuPair<WriteVecIMulX,   [Zn2FPU0],  4>;
402defm : Zn2WriteResFpuPair<WriteVecIMulY,   [Zn2FPU0],  4>;
403defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
404defm : Zn2WriteResFpuPair<WritePMULLD,     [Zn2FPU0],  4, [1], 1, 7, 1>;
405defm : Zn2WriteResFpuPair<WritePMULLDY,    [Zn2FPU0],  3, [1], 1, 7, 1>;
406defm : X86WriteResPairUnsupported<WritePMULLDZ>;
407defm : Zn2WriteResFpuPair<WriteShuffle,    [Zn2FPU],   1>;
408defm : Zn2WriteResFpuPair<WriteShuffleX,   [Zn2FPU],   1>;
409defm : Zn2WriteResFpuPair<WriteShuffleY,   [Zn2FPU],   1>;
410defm : X86WriteResPairUnsupported<WriteShuffleZ>;
411defm : Zn2WriteResFpuPair<WriteVarShuffle, [Zn2FPU],   1>;
412defm : Zn2WriteResFpuPair<WriteVarShuffleX,[Zn2FPU],   1>;
413defm : Zn2WriteResFpuPair<WriteVarShuffleY,[Zn2FPU],   1>;
414defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
415defm : Zn2WriteResFpuPair<WriteBlend,      [Zn2FPU01], 1>;
416defm : Zn2WriteResFpuPair<WriteBlendY,     [Zn2FPU01], 1>;
417defm : X86WriteResPairUnsupported<WriteBlendZ>;
418defm : Zn2WriteResFpuPair<WriteShuffle256, [Zn2FPU],   2>;
419defm : Zn2WriteResFpuPair<WriteVarShuffle256, [Zn2FPU],   2>;
420defm : Zn2WriteResFpuPair<WritePSADBW,     [Zn2FPU0],  3>;
421defm : Zn2WriteResFpuPair<WritePSADBWX,    [Zn2FPU0],  3>;
422defm : Zn2WriteResFpuPair<WritePSADBWY,    [Zn2FPU0],  3>;
423defm : X86WriteResPairUnsupported<WritePSADBWZ>;
424defm : Zn2WriteResFpuPair<WritePHMINPOS,   [Zn2FPU0],  4>;
425
426// Vector Shift Operations
427defm : Zn2WriteResFpuPair<WriteVarVecShift,  [Zn2FPU12], 1>;
428defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 1>;
429defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
430
431// Vector insert/extract operations.
432defm : Zn2WriteResFpuPair<WriteVecInsert,   [Zn2FPU],   1>;
433
434def : WriteRes<WriteVecExtract, [Zn2FPU12, Zn2FPU2]> {
435  let Latency = 2;
436  let ResourceCycles = [1, 2];
437}
438def : WriteRes<WriteVecExtractSt, [Zn2AGU, Zn2FPU12, Zn2FPU2]> {
439  let Latency = 5;
440  let NumMicroOps = 2;
441  let ResourceCycles = [1, 2, 3];
442}
443
444// MOVMSK Instructions.
445def : WriteRes<WriteFMOVMSK, [Zn2FPU2]>;
446def : WriteRes<WriteMMXMOVMSK, [Zn2FPU2]>;
447def : WriteRes<WriteVecMOVMSK, [Zn2FPU2]>;
448
449def : WriteRes<WriteVecMOVMSKY, [Zn2FPU2]> {
450  let NumMicroOps = 2;
451  let Latency = 2;
452  let ResourceCycles = [2];
453}
454
455// AES Instructions.
456defm : Zn2WriteResFpuPair<WriteAESDecEnc, [Zn2FPU01], 4>;
457defm : Zn2WriteResFpuPair<WriteAESIMC,    [Zn2FPU01], 4>;
458defm : Zn2WriteResFpuPair<WriteAESKeyGen, [Zn2FPU01], 4>;
459
460def : WriteRes<WriteFence,  [Zn2AGU]>;
461def : WriteRes<WriteNop, []>;
462
463// Following instructions with latency=100 are microcoded.
464// We set long latency so as to block the entire pipeline.
465defm : Zn2WriteResFpuPair<WriteFShuffle256, [Zn2FPU], 100>;
466defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
467
468// Microcoded Instructions
469def Zn2WriteMicrocoded : SchedWriteRes<[]> {
470  let Latency = 100;
471}
472
473def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
474def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
475def : SchedAlias<WriteSystem, Zn2WriteMicrocoded>;
476def : SchedAlias<WriteMPSAD, Zn2WriteMicrocoded>;
477def : SchedAlias<WriteMPSADY, Zn2WriteMicrocoded>;
478def : SchedAlias<WriteMPSADLd, Zn2WriteMicrocoded>;
479def : SchedAlias<WriteMPSADYLd, Zn2WriteMicrocoded>;
480def : SchedAlias<WriteCLMul, Zn2WriteMicrocoded>;
481def : SchedAlias<WriteCLMulLd, Zn2WriteMicrocoded>;
482def : SchedAlias<WritePCmpIStrM, Zn2WriteMicrocoded>;
483def : SchedAlias<WritePCmpIStrMLd, Zn2WriteMicrocoded>;
484def : SchedAlias<WritePCmpEStrI, Zn2WriteMicrocoded>;
485def : SchedAlias<WritePCmpEStrILd, Zn2WriteMicrocoded>;
486def : SchedAlias<WritePCmpEStrM, Zn2WriteMicrocoded>;
487def : SchedAlias<WritePCmpEStrMLd, Zn2WriteMicrocoded>;
488def : SchedAlias<WritePCmpIStrI, Zn2WriteMicrocoded>;
489def : SchedAlias<WritePCmpIStrILd, Zn2WriteMicrocoded>;
490def : SchedAlias<WriteLDMXCSR, Zn2WriteMicrocoded>;
491def : SchedAlias<WriteSTMXCSR, Zn2WriteMicrocoded>;
492
493//=== Regex based InstRW ===//
494// Notation:
495// - r: register.
496// - m = memory.
497// - i = immediate
498// - mm: 64 bit mmx register.
499// - x = 128 bit xmm register.
500// - (x)mm = mmx or xmm register.
501// - y = 256 bit ymm register.
502// - v = any vector register.
503
504//=== Integer Instructions ===//
505//-- Move instructions --//
506// MOV.
507// r16,m.
508def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
509
510// MOVSX, MOVZX.
511// r,m.
512def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
513
514// XCHG.
515// r,r.
516def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> {
517  let NumMicroOps = 2;
518}
519
520def : InstRW<[Zn2WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
521
522// r,m.
523def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
524  let Latency = 5;
525  let NumMicroOps = 2;
526}
527def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
528
529def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
530
531// POP16.
532// r.
533def Zn2WritePop16r : SchedWriteRes<[Zn2AGU]>{
534  let Latency = 5;
535  let NumMicroOps = 2;
536}
537def : InstRW<[Zn2WritePop16r], (instregex "POP16rmm")>;
538def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
539def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
540
541
542// PUSH.
543// r. Has default values.
544// m.
545def Zn2WritePUSH : SchedWriteRes<[Zn2AGU]>{
546  let Latency = 4;
547}
548def : InstRW<[Zn2WritePUSH], (instregex "PUSH(16|32)rmm")>;
549
550//PUSHF
551def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
552
553// PUSHA.
554def Zn2WritePushA : SchedWriteRes<[Zn2AGU]> {
555  let Latency = 8;
556}
557def : InstRW<[Zn2WritePushA], (instregex "PUSHA(16|32)")>;
558
559//LAHF
560def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
561
562// MOVBE.
563// r,m.
564def Zn2WriteMOVBE : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
565  let Latency = 5;
566}
567def : InstRW<[Zn2WriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
568
569// m16,r16.
570def : InstRW<[Zn2WriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
571
572//-- Arithmetic instructions --//
573
574// ADD SUB.
575// m,r/i.
576def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
577                          "(ADD|SUB)(8|16|32|64)mi8",
578                          "(ADD|SUB)64mi32")>;
579
580// ADC SBB.
581// m,r/i.
582def : InstRW<[WriteALULd],
583             (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
584              "(ADC|SBB)(16|32|64)mi8",
585              "(ADC|SBB)64mi32")>;
586
587// INC DEC NOT NEG.
588// m.
589def : InstRW<[WriteALULd],
590             (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
591
592// MUL IMUL.
593// r16.
594def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
595  let Latency = 3;
596}
597def : SchedAlias<WriteIMul16, Zn2WriteMul16>;
598def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16>;
599def : SchedAlias<WriteIMul16Reg, Zn2WriteMul16>;
600
601// m16.
602def Zn2WriteMul16Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
603  let Latency = 7;
604}
605def : SchedAlias<WriteIMul16Ld, Zn2WriteMul16Ld>;
606def : SchedAlias<WriteIMul16ImmLd, Zn2WriteMul16Ld>;
607def : SchedAlias<WriteIMul16RegLd, Zn2WriteMul16Ld>;
608
609// r32.
610def Zn2WriteMul32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
611  let Latency = 3;
612}
613def : SchedAlias<WriteIMul32, Zn2WriteMul32>;
614def : SchedAlias<WriteIMul32Imm, Zn2WriteMul32>;
615def : SchedAlias<WriteIMul32Reg, Zn2WriteMul32>;
616
617// m32.
618def Zn2WriteMul32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
619  let Latency = 7;
620}
621def : SchedAlias<WriteIMul32Ld, Zn2WriteMul32Ld>;
622def : SchedAlias<WriteIMul32ImmLd, Zn2WriteMul32Ld>;
623def : SchedAlias<WriteIMul32RegLd, Zn2WriteMul32Ld>;
624
625// r64.
626def Zn2WriteMul64 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
627  let Latency = 4;
628  let NumMicroOps = 2;
629}
630def : SchedAlias<WriteIMul64, Zn2WriteMul64>;
631def : SchedAlias<WriteIMul64Imm, Zn2WriteMul64>;
632def : SchedAlias<WriteIMul64Reg, Zn2WriteMul64>;
633
634// m64.
635def Zn2WriteMul64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
636  let Latency = 8;
637  let NumMicroOps = 2;
638}
639def : SchedAlias<WriteIMul64Ld, Zn2WriteMul64Ld>;
640def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
641def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
642
643// MULX.
644// r32,r32,r32.
645def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
646  let Latency = 3;
647  let ResourceCycles = [1, 2];
648}
649def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
650
651// r32,r32,m32.
652def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
653  let Latency = 7;
654  let ResourceCycles = [1, 2, 2];
655}
656def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
657
658// r64,r64,r64.
659def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
660  let Latency = 3;
661}
662def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
663
664// r64,r64,m64.
665def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
666  let Latency = 7;
667}
668def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
669
670//-- Control transfer instructions --//
671
672// J(E|R)CXZ.
673def Zn2WriteJCXZ : SchedWriteRes<[Zn2ALU03]>;
674def : InstRW<[Zn2WriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
675
676// INTO
677def : InstRW<[WriteMicrocoded], (instrs INTO)>;
678
679// LOOP.
680def Zn2WriteLOOP : SchedWriteRes<[Zn2ALU03]>;
681def : InstRW<[Zn2WriteLOOP], (instrs LOOP)>;
682
683// LOOP(N)E, LOOP(N)Z
684def Zn2WriteLOOPE : SchedWriteRes<[Zn2ALU03]>;
685def : InstRW<[Zn2WriteLOOPE], (instrs LOOPE, LOOPNE)>;
686
687// CALL.
688// r.
689def Zn2WriteCALLr : SchedWriteRes<[Zn2AGU, Zn2ALU03]>;
690def : InstRW<[Zn2WriteCALLr], (instregex "CALL(16|32)r")>;
691
692def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
693
694// RET.
695def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> {
696  let NumMicroOps = 2;
697}
698def : InstRW<[Zn2WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
699                            "IRET(16|32|64)")>;
700
701//-- Logic instructions --//
702
703// AND OR XOR.
704// m,r/i.
705def : InstRW<[WriteALULd],
706             (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
707              "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
708
709// Define ALU latency variants
710def Zn2WriteALULat2 : SchedWriteRes<[Zn2ALU]> {
711  let Latency = 2;
712}
713def Zn2WriteALULat2Ld : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
714  let Latency = 6;
715}
716
717// BT.
718// m,i.
719def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
720
721// BTR BTS BTC.
722// r,r,i.
723def Zn2WriteBTRSC : SchedWriteRes<[Zn2ALU]> {
724  let Latency = 2;
725  let NumMicroOps = 2;
726}
727def : InstRW<[Zn2WriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
728
729// m,r,i.
730def Zn2WriteBTRSCm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
731  let Latency = 6;
732  let NumMicroOps = 2;
733}
734// m,r,i.
735def : SchedAlias<WriteBitTestSetImmRMW, Zn2WriteBTRSCm>;
736def : SchedAlias<WriteBitTestSetRegRMW, Zn2WriteBTRSCm>;
737
738// BLSI BLSMSK BLSR.
739// r,r.
740def : SchedAlias<WriteBLS, Zn2WriteALULat2>;
741// r,m.
742def : SchedAlias<WriteBLSLd, Zn2WriteALULat2Ld>;
743
744// CLD STD.
745def : InstRW<[WriteALU], (instrs STD, CLD)>;
746
747// PDEP PEXT.
748// r,r,r.
749def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
750// r,r,m.
751def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
752
753// RCR RCL.
754// m,i.
755def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
756
757// SHR SHL SAR.
758// m,i.
759def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
760
761// SHRD SHLD.
762// m,r
763def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
764
765// r,r,cl.
766def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
767
768// m,r,cl.
769def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
770
771//-- Misc instructions --//
772// CMPXCHG8B.
773def Zn2WriteCMPXCHG8B : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
774  let NumMicroOps = 18;
775}
776def : InstRW<[Zn2WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
777
778def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
779
780// LEAVE
781def Zn2WriteLEAVE : SchedWriteRes<[Zn2ALU, Zn2AGU]> {
782  let Latency = 8;
783  let NumMicroOps = 2;
784}
785def : InstRW<[Zn2WriteLEAVE], (instregex "LEAVE")>;
786
787// PAUSE.
788def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
789
790// RDTSC.
791def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
792
793// RDPMC.
794def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
795
796// RDRAND.
797def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
798
799// XGETBV.
800def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
801
802//-- String instructions --//
803// CMPS.
804def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
805
806// LODSB/W.
807def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
808
809// LODSD/Q.
810def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
811
812// MOVS.
813def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
814
815// SCAS.
816def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
817
818// STOS
819def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
820
821// XADD.
822def Zn2XADD : SchedWriteRes<[Zn2ALU]>;
823def : InstRW<[Zn2XADD], (instregex "XADD(8|16|32|64)rr")>;
824def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
825
826//=== Floating Point x87 Instructions ===//
827//-- Move instructions --//
828
829def Zn2WriteFLDr : SchedWriteRes<[Zn2FPU13]> ;
830
831def Zn2WriteSTr: SchedWriteRes<[Zn2FPU23]> {
832  let Latency = 5;
833  let NumMicroOps = 2;
834}
835
836// LD_F.
837// r.
838def : InstRW<[Zn2WriteFLDr], (instregex "LD_Frr")>;
839
840// m.
841def Zn2WriteLD_F80m : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
842  let NumMicroOps = 2;
843}
844def : InstRW<[Zn2WriteLD_F80m], (instregex "LD_F80m")>;
845
846// FBLD.
847def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
848
849// FST(P).
850// r.
851def : InstRW<[Zn2WriteSTr], (instregex "ST_(F|FP)rr")>;
852
853// m80.
854def Zn2WriteST_FP80m : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
855  let Latency = 5;
856}
857def : InstRW<[Zn2WriteST_FP80m], (instregex "ST_FP80m")>;
858
859// FBSTP.
860// m80.
861def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
862
863def Zn2WriteFXCH : SchedWriteRes<[Zn2FPU]>;
864
865// FXCHG.
866def : InstRW<[Zn2WriteFXCH], (instrs XCH_F)>;
867
868// FILD.
869def Zn2WriteFILD : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
870  let Latency = 11;
871  let NumMicroOps = 2;
872}
873def : InstRW<[Zn2WriteFILD], (instregex "ILD_F(16|32|64)m")>;
874
875// FIST(P) FISTTP.
876def Zn2WriteFIST : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
877  let Latency = 12;
878}
879def : InstRW<[Zn2WriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
880
881def Zn2WriteFPU13 : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
882  let Latency = 8;
883}
884
885def Zn2WriteFPU3 : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
886  let Latency = 11;
887}
888
889// FLDZ.
890def : SchedAlias<WriteFLD0, Zn2WriteFPU13>;
891
892// FLD1.
893def : SchedAlias<WriteFLD1, Zn2WriteFPU3>;
894
895// FLDPI FLDL2E etc.
896def : SchedAlias<WriteFLDC, Zn2WriteFPU3>;
897
898// FNSTSW.
899// AX.
900def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
901
902// m16.
903def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>;
904
905// FLDCW.
906def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
907
908// FNSTCW.
909def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
910
911// FINCSTP FDECSTP.
912def : InstRW<[Zn2WriteFPU3], (instrs FINCSTP, FDECSTP)>;
913
914// FFREE.
915def : InstRW<[Zn2WriteFPU3], (instregex "FFREE")>;
916
917// FNSAVE.
918def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
919
920// FRSTOR.
921def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
922
923//-- Arithmetic instructions --//
924
925def Zn2WriteFPU3Lat1 : SchedWriteRes<[Zn2FPU3]> ;
926
927def Zn2WriteFPU0Lat1 : SchedWriteRes<[Zn2FPU0]> ;
928
929def Zn2WriteFPU0Lat1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU0]> {
930  let Latency = 8;
931}
932
933// FCHS.
934def : InstRW<[Zn2WriteFPU3Lat1], (instregex "CHS_F")>;
935
936// FCOM(P) FUCOM(P).
937// r.
938def : InstRW<[Zn2WriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
939// m.
940def : InstRW<[Zn2WriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
941
942// FCOMPP FUCOMPP.
943// r.
944def : InstRW<[Zn2WriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
945
946def Zn2WriteFPU02 : SchedWriteRes<[Zn2AGU, Zn2FPU02]>
947{
948  let Latency = 9;
949}
950
951// FCOMI(P) FUCOMI(P).
952// m.
953def : InstRW<[Zn2WriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
954
955def Zn2WriteFPU03 : SchedWriteRes<[Zn2AGU, Zn2FPU03]>
956{
957  let Latency = 12;
958  let NumMicroOps = 2;
959  let ResourceCycles = [1,3];
960}
961
962// FICOM(P).
963def : InstRW<[Zn2WriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
964
965// FTST.
966def : InstRW<[Zn2WriteFPU0Lat1], (instregex "TST_F")>;
967
968// FXAM.
969def : InstRW<[Zn2WriteFPU3Lat1], (instrs FXAM)>;
970
971// FPREM.
972def : InstRW<[WriteMicrocoded], (instrs FPREM)>;
973
974// FPREM1.
975def : InstRW<[WriteMicrocoded], (instrs FPREM1)>;
976
977// FRNDINT.
978def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>;
979
980// FSCALE.
981def : InstRW<[WriteMicrocoded], (instrs FSCALE)>;
982
983// FXTRACT.
984def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>;
985
986// FNOP.
987def : InstRW<[Zn2WriteFPU0Lat1], (instrs FNOP)>;
988
989// WAIT.
990def : InstRW<[Zn2WriteFPU0Lat1], (instrs WAIT)>;
991
992// FNCLEX.
993def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>;
994
995// FNINIT.
996def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
997
998//=== Integer MMX and XMM Instructions ===//
999
1000// PACKSSWB/DW.
1001// mm <- mm.
1002def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ;
1003def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> {
1004  let NumMicroOps = 2;
1005}
1006def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ;
1007def Zn2WriteFPU12Ym : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1008  let Latency = 8;
1009  let NumMicroOps = 2;
1010}
1011
1012def : InstRW<[Zn2WriteFPU12], (instrs MMX_PACKSSDWirr,
1013                                     MMX_PACKSSWBirr,
1014                                     MMX_PACKUSWBirr)>;
1015def : InstRW<[Zn2WriteFPU12m], (instrs MMX_PACKSSDWirm,
1016                                      MMX_PACKSSWBirm,
1017                                      MMX_PACKUSWBirm)>;
1018
1019// VPMOVSX/ZX BW BD BQ WD WQ DQ.
1020// y <- x.
1021def : InstRW<[Zn2WriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>;
1022def : InstRW<[Zn2WriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>;
1023
1024def Zn2WriteFPU013 : SchedWriteRes<[Zn2FPU013]> ;
1025def Zn2WriteFPU013Y : SchedWriteRes<[Zn2FPU013]> ;
1026def Zn2WriteFPU013m : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1027  let Latency = 8;
1028  let NumMicroOps = 2;
1029}
1030def Zn2WriteFPU013Ld : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1031  let Latency = 8;
1032  let NumMicroOps = 2;
1033}
1034def Zn2WriteFPU013LdY : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1035  let Latency = 8;
1036  let NumMicroOps = 2;
1037}
1038
1039// PBLENDW.
1040// x,x,i / v,v,v,i
1041def : InstRW<[Zn2WriteFPU013], (instregex "(V?)PBLENDWrri")>;
1042// ymm
1043def : InstRW<[Zn2WriteFPU013Y], (instrs VPBLENDWYrri)>;
1044
1045// x,m,i / v,v,m,i
1046def : InstRW<[Zn2WriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
1047// y,m,i
1048def : InstRW<[Zn2WriteFPU013LdY], (instrs VPBLENDWYrmi)>;
1049
1050def Zn2WriteFPU01 : SchedWriteRes<[Zn2FPU01]> ;
1051def Zn2WriteFPU01Y : SchedWriteRes<[Zn2FPU01]> {
1052  let NumMicroOps = 2;
1053}
1054
1055// VPBLENDD.
1056// v,v,v,i.
1057def : InstRW<[Zn2WriteFPU01], (instrs VPBLENDDrri)>;
1058// ymm
1059def : InstRW<[Zn2WriteFPU01Y], (instrs VPBLENDDYrri)>;
1060
1061// v,v,m,i
1062def Zn2WriteFPU01Op2 : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1063  let NumMicroOps = 2;
1064  let Latency = 8;
1065  let ResourceCycles = [1, 2];
1066}
1067def Zn2WriteFPU01Op2Y : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1068  let NumMicroOps = 2;
1069  let Latency = 9;
1070  let ResourceCycles = [1, 3];
1071}
1072def : InstRW<[Zn2WriteFPU01Op2], (instrs VPBLENDDrmi)>;
1073def : InstRW<[Zn2WriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
1074
1075// MASKMOVQ.
1076def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
1077
1078// MASKMOVDQU.
1079def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
1080
1081// VPMASKMOVD.
1082// ymm
1083def : InstRW<[WriteMicrocoded],
1084                               (instregex "VPMASKMOVD(Y?)rm")>;
1085// m, v,v.
1086def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
1087
1088// VPBROADCAST B/W.
1089// x, m8/16.
1090def Zn2WriteVPBROADCAST128Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1091  let Latency = 8;
1092  let NumMicroOps = 2;
1093  let ResourceCycles = [1, 2];
1094}
1095def : InstRW<[Zn2WriteVPBROADCAST128Ld],
1096                                     (instregex "VPBROADCAST(B|W)rm")>;
1097
1098// y, m8/16
1099def Zn2WriteVPBROADCAST256Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1100  let Latency = 8;
1101  let NumMicroOps = 2;
1102  let ResourceCycles = [1, 2];
1103}
1104def : InstRW<[Zn2WriteVPBROADCAST256Ld],
1105                                     (instregex "VPBROADCAST(B|W)Yrm")>;
1106
1107// VPGATHER.
1108def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
1109
1110//-- Arithmetic instructions --//
1111
1112// HADD, HSUB PS/PD
1113// PHADD|PHSUB (S) W/D.
1114def : SchedAlias<WritePHAdd,    Zn2WriteMicrocoded>;
1115def : SchedAlias<WritePHAddLd,  Zn2WriteMicrocoded>;
1116def : SchedAlias<WritePHAddX,   Zn2WriteMicrocoded>;
1117def : SchedAlias<WritePHAddXLd, Zn2WriteMicrocoded>;
1118def : SchedAlias<WritePHAddY,   Zn2WriteMicrocoded>;
1119def : SchedAlias<WritePHAddYLd, Zn2WriteMicrocoded>;
1120
1121// PCMPGTQ.
1122def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
1123def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
1124
1125// x <- x,m.
1126def Zn2WritePCMPGTQm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1127  let Latency = 8;
1128}
1129// ymm.
1130def Zn2WritePCMPGTQYm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1131  let Latency = 8;
1132}
1133def : InstRW<[Zn2WritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
1134def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
1135
1136//-- Logic instructions --//
1137
1138// PSLL,PSRL,PSRA W/D/Q.
1139// x,x / v,v,x.
1140def Zn2WritePShift  : SchedWriteRes<[Zn2FPU2]> ;
1141def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> ;
1142
1143// PSLL,PSRL DQ.
1144def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>;
1145def : InstRW<[Zn2WritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
1146
1147//=== Floating Point XMM and YMM Instructions ===//
1148//-- Move instructions --//
1149
1150// VPERM2F128.
1151def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
1152def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
1153
1154def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
1155  let NumMicroOps = 2;
1156  let Latency = 8;
1157}
1158// VBROADCASTF128.
1159def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128)>;
1160
1161// EXTRACTPS.
1162// r32,x,i.
1163def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1164  let Latency = 2;
1165  let ResourceCycles = [1, 2];
1166}
1167def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
1168
1169def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> {
1170  let Latency = 5;
1171  let NumMicroOps = 2;
1172  let ResourceCycles = [5, 1, 2];
1173}
1174// m32,x,i.
1175def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
1176
1177// VEXTRACTF128.
1178// x,y,i.
1179def : InstRW<[Zn2WriteFPU013], (instrs VEXTRACTF128rr)>;
1180
1181// m128,y,i.
1182def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr)>;
1183
1184def Zn2WriteVINSERT128r: SchedWriteRes<[Zn2FPU013]> {
1185  let Latency = 2;
1186//  let ResourceCycles = [2];
1187}
1188def Zn2WriteVINSERT128Ld: SchedWriteRes<[Zn2AGU,Zn2FPU013]> {
1189  let Latency = 9;
1190  let NumMicroOps = 2;
1191}
1192// VINSERTF128.
1193// y,y,x,i.
1194def : InstRW<[Zn2WriteVINSERT128r], (instrs VINSERTF128rr)>;
1195def : InstRW<[Zn2WriteVINSERT128Ld], (instrs VINSERTF128rm)>;
1196
1197// VGATHER.
1198def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
1199
1200//-- Conversion instructions --//
1201def Zn2WriteCVTPD2PSr: SchedWriteRes<[Zn2FPU3]> {
1202  let Latency = 3;
1203}
1204def Zn2WriteCVTPD2PSYr: SchedWriteRes<[Zn2FPU3]> {
1205  let Latency = 3;
1206}
1207
1208// CVTPD2PS.
1209// x,x.
1210def : SchedAlias<WriteCvtPD2PS,  Zn2WriteCVTPD2PSr>;
1211// y,y.
1212def : SchedAlias<WriteCvtPD2PSY, Zn2WriteCVTPD2PSYr>;
1213// z,z.
1214defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
1215
1216def Zn2WriteCVTPD2PSLd: SchedWriteRes<[Zn2AGU,Zn2FPU03]> {
1217  let Latency = 10;
1218  let NumMicroOps = 2;
1219}
1220// x,m128.
1221def : SchedAlias<WriteCvtPD2PSLd, Zn2WriteCVTPD2PSLd>;
1222
1223// x,m256.
1224def Zn2WriteCVTPD2PSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1225  let Latency = 10;
1226}
1227def : SchedAlias<WriteCvtPD2PSYLd, Zn2WriteCVTPD2PSYLd>;
1228// z,m512
1229defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
1230
1231// CVTSD2SS.
1232// x,x.
1233// Same as WriteCVTPD2PSr
1234def : SchedAlias<WriteCvtSD2SS, Zn2WriteCVTPD2PSr>;
1235
1236// x,m64.
1237def : SchedAlias<WriteCvtSD2SSLd, Zn2WriteCVTPD2PSLd>;
1238
1239// CVTPS2PD.
1240// x,x.
1241def Zn2WriteCVTPS2PDr : SchedWriteRes<[Zn2FPU3]> {
1242  let Latency = 3;
1243}
1244def : SchedAlias<WriteCvtPS2PD, Zn2WriteCVTPS2PDr>;
1245
1246// x,m64.
1247// y,m128.
1248def Zn2WriteCVTPS2PDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1249  let Latency = 10;
1250  let NumMicroOps = 2;
1251}
1252def : SchedAlias<WriteCvtPS2PDLd, Zn2WriteCVTPS2PDLd>;
1253def : SchedAlias<WriteCvtPS2PDYLd, Zn2WriteCVTPS2PDLd>;
1254defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
1255
1256// y,x.
1257def Zn2WriteVCVTPS2PDY : SchedWriteRes<[Zn2FPU3]> {
1258  let Latency = 3;
1259}
1260def : SchedAlias<WriteCvtPS2PDY, Zn2WriteVCVTPS2PDY>;
1261defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
1262
1263// CVTSS2SD.
1264// x,x.
1265def Zn2WriteCVTSS2SDr : SchedWriteRes<[Zn2FPU3]> {
1266  let Latency = 3;
1267}
1268def : SchedAlias<WriteCvtSS2SD, Zn2WriteCVTSS2SDr>;
1269
1270// x,m32.
1271def Zn2WriteCVTSS2SDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1272  let Latency = 10;
1273  let NumMicroOps = 2;
1274  let ResourceCycles = [1, 2];
1275}
1276def : SchedAlias<WriteCvtSS2SDLd, Zn2WriteCVTSS2SDLd>;
1277
1278def Zn2WriteCVTDQ2PDr: SchedWriteRes<[Zn2FPU12,Zn2FPU3]> {
1279  let Latency = 3;
1280}
1281// CVTDQ2PD.
1282// x,x.
1283def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
1284
1285// Same as xmm
1286// y,x.
1287def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
1288def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PSYrr)>;
1289
1290def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> {
1291  let Latency = 3;
1292}
1293// CVT(T)PD2DQ.
1294// x,x.
1295def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)PD2DQrr")>;
1296
1297def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> {
1298  let Latency = 10;
1299  let NumMicroOps = 2;
1300}
1301// x,m128.
1302def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
1303// same as xmm handling
1304// x,y.
1305def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
1306// x,m256.
1307def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
1308
1309def Zn2WriteCVTPS2PIr: SchedWriteRes<[Zn2FPU3]> {
1310  let Latency = 4;
1311}
1312// CVT(T)PS2PI.
1313// mm,x.
1314def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
1315
1316// CVTPI2PD.
1317// x,mm.
1318def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
1319
1320// CVT(T)PD2PI.
1321// mm,x.
1322def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
1323
1324def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
1325  let Latency = 4;
1326}
1327
1328// same as CVTPD2DQr
1329// CVT(T)SS2SI.
1330// r32,x.
1331def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
1332// same as CVTPD2DQm
1333// r32,m32.
1334def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
1335
1336def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> {
1337  let Latency = 4;
1338}
1339// CVTSI2SD.
1340// x,r32/64.
1341def : InstRW<[Zn2WriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
1342
1343
1344def Zn2WriteCVSTSI2SIr: SchedWriteRes<[Zn2FPU3, Zn2FPU2]> {
1345  let Latency = 4;
1346}
1347def Zn2WriteCVSTSI2SILd: SchedWriteRes<[Zn2AGU, Zn2FPU3, Zn2FPU2]> {
1348  let Latency = 11;
1349}
1350// CVTSD2SI.
1351// r32/64
1352def : InstRW<[Zn2WriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
1353// r32,m32.
1354def : InstRW<[Zn2WriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
1355
1356// VCVTPS2PH.
1357// x,v,i.
1358def : SchedAlias<WriteCvtPS2PH,    Zn2WriteMicrocoded>;
1359def : SchedAlias<WriteCvtPS2PHY,   Zn2WriteMicrocoded>;
1360defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
1361// m,v,i.
1362def : SchedAlias<WriteCvtPS2PHSt,  Zn2WriteMicrocoded>;
1363def : SchedAlias<WriteCvtPS2PHYSt, Zn2WriteMicrocoded>;
1364defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
1365
1366// VCVTPH2PS.
1367// v,x.
1368def : SchedAlias<WriteCvtPH2PS,    Zn2WriteMicrocoded>;
1369def : SchedAlias<WriteCvtPH2PSY,   Zn2WriteMicrocoded>;
1370defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
1371// v,m.
1372def : SchedAlias<WriteCvtPH2PSLd,  Zn2WriteMicrocoded>;
1373def : SchedAlias<WriteCvtPH2PSYLd, Zn2WriteMicrocoded>;
1374defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
1375
1376//-- SSE4A instructions --//
1377// EXTRQ
1378def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1379  let Latency = 2;
1380}
1381def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>;
1382
1383// INSERTQ
1384def Zn2WriteINSERTQ: SchedWriteRes<[Zn2FPU03,Zn2FPU1]> {
1385  let Latency = 4;
1386}
1387def : InstRW<[Zn2WriteINSERTQ], (instregex "INSERTQ")>;
1388
1389//-- SHA instructions --//
1390// SHA256MSG2
1391def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
1392
1393// SHA1MSG1, SHA256MSG1
1394// x,x.
1395def Zn2WriteSHA1MSG1r : SchedWriteRes<[Zn2FPU12]> {
1396  let Latency = 2;
1397}
1398def : InstRW<[Zn2WriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
1399// x,m.
1400def Zn2WriteSHA1MSG1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1401  let Latency = 9;
1402}
1403def : InstRW<[Zn2WriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
1404
1405// SHA1MSG2
1406// x,x.
1407def Zn2WriteSHA1MSG2r : SchedWriteRes<[Zn2FPU12]> ;
1408def : InstRW<[Zn2WriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
1409// x,m.
1410def Zn2WriteSHA1MSG2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1411  let Latency = 8;
1412}
1413def : InstRW<[Zn2WriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
1414
1415// SHA1NEXTE
1416// x,x.
1417def Zn2WriteSHA1NEXTEr : SchedWriteRes<[Zn2FPU1]> ;
1418def : InstRW<[Zn2WriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
1419// x,m.
1420def Zn2WriteSHA1NEXTELd : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1421  let Latency = 8;
1422}
1423def : InstRW<[Zn2WriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
1424
1425// SHA1RNDS4
1426// x,x.
1427def Zn2WriteSHA1RNDS4r : SchedWriteRes<[Zn2FPU1]> {
1428  let Latency = 6;
1429}
1430def : InstRW<[Zn2WriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
1431// x,m.
1432def Zn2WriteSHA1RNDS4Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1433  let Latency = 13;
1434}
1435def : InstRW<[Zn2WriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
1436
1437// SHA256RNDS2
1438// x,x.
1439def Zn2WriteSHA256RNDS2r : SchedWriteRes<[Zn2FPU1]> {
1440  let Latency = 4;
1441}
1442def : InstRW<[Zn2WriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
1443// x,m.
1444def Zn2WriteSHA256RNDS2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1445  let Latency = 11;
1446}
1447def : InstRW<[Zn2WriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
1448
1449//-- Arithmetic instructions --//
1450
1451// HADD, HSUB PS/PD
1452def : SchedAlias<WriteFHAdd,    Zn2WriteMicrocoded>;
1453def : SchedAlias<WriteFHAddLd,  Zn2WriteMicrocoded>;
1454def : SchedAlias<WriteFHAddY,   Zn2WriteMicrocoded>;
1455def : SchedAlias<WriteFHAddYLd, Zn2WriteMicrocoded>;
1456
1457// VDIVPS.
1458// TODO - convert to Zn2WriteResFpuPair
1459// y,y,y.
1460def Zn2WriteVDIVPSYr : SchedWriteRes<[Zn2FPU3]> {
1461  let Latency = 10;
1462  let ResourceCycles = [10];
1463}
1464def : SchedAlias<WriteFDivY,   Zn2WriteVDIVPSYr>;
1465
1466// y,y,m256.
1467def Zn2WriteVDIVPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1468  let Latency = 17;
1469  let NumMicroOps = 2;
1470  let ResourceCycles = [1, 17];
1471}
1472def : SchedAlias<WriteFDivYLd,  Zn2WriteVDIVPSYLd>;
1473
1474// VDIVPD.
1475// TODO - convert to Zn2WriteResFpuPair
1476// y,y,y.
1477def Zn2WriteVDIVPDY : SchedWriteRes<[Zn2FPU3]> {
1478  let Latency = 13;
1479  let ResourceCycles = [13];
1480}
1481def : SchedAlias<WriteFDiv64Y, Zn2WriteVDIVPDY>;
1482
1483// y,y,m256.
1484def Zn2WriteVDIVPDYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1485  let Latency = 20;
1486  let NumMicroOps = 2;
1487  let ResourceCycles = [1,20];
1488}
1489def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
1490
1491// DPPS.
1492// x,x,i / v,v,v,i.
1493def : SchedAlias<WriteDPPS,   Zn2WriteMicrocoded>;
1494def : SchedAlias<WriteDPPSY,  Zn2WriteMicrocoded>;
1495
1496// x,m,i / v,v,m,i.
1497def : SchedAlias<WriteDPPSLd, Zn2WriteMicrocoded>;
1498def : SchedAlias<WriteDPPSYLd,Zn2WriteMicrocoded>;
1499
1500// DPPD.
1501// x,x,i.
1502def : SchedAlias<WriteDPPD,   Zn2WriteMicrocoded>;
1503
1504// x,m,i.
1505def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
1506
1507// RSQRTSS
1508// TODO - convert to Zn2WriteResFpuPair
1509// x,x.
1510def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
1511  let Latency = 5;
1512}
1513def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
1514
1515// x,m128.
1516def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
1517  let Latency = 12;
1518  let NumMicroOps = 2;
1519  let ResourceCycles = [1,2];
1520}
1521def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
1522
1523// RSQRTPS
1524// TODO - convert to Zn2WriteResFpuPair
1525// y,y.
1526def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
1527  let Latency = 5;
1528  let NumMicroOps = 2;
1529  let ResourceCycles = [2];
1530}
1531def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
1532
1533// y,m256.
1534def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1535  let Latency = 12;
1536  let NumMicroOps = 2;
1537}
1538def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
1539
1540//-- Other instructions --//
1541
1542// VZEROUPPER.
1543def : InstRW<[WriteALU], (instrs VZEROUPPER)>;
1544
1545// VZEROALL.
1546def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
1547
1548} // SchedModel
1549