173471bf0Spatrick//=- X86ScheduleZnver3.td - X86 Znver3 Scheduling ------------*- tablegen -*-=//
273471bf0Spatrick//
373471bf0Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
473471bf0Spatrick// See https://llvm.org/LICENSE.txt for license information.
573471bf0Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
673471bf0Spatrick//
773471bf0Spatrick//===----------------------------------------------------------------------===//
873471bf0Spatrick//
973471bf0Spatrick// This file defines the machine model for Znver3 to support instruction
1073471bf0Spatrick// scheduling and other instruction cost heuristics.
1173471bf0Spatrick// Based on:
1273471bf0Spatrick//  * AMD Software Optimization Guide for AMD Family 19h Processors.
1373471bf0Spatrick//    https://www.amd.com/system/files/TechDocs/56665.zip
1473471bf0Spatrick//  * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
1573471bf0Spatrick//    http://www.agner.org/optimize/microarchitecture.pdf
1673471bf0Spatrick//  * AMD Zen 3 Ryzen Deep Dive Review
1773471bf0Spatrick//    https://www.anandtech.com/show/16214/
1873471bf0Spatrick//===----------------------------------------------------------------------===//
1973471bf0Spatrick
2073471bf0Spatrickdef Znver3Model : SchedMachineModel {
2173471bf0Spatrick  // AMD SOG 19h, 2.9.6 Dispatch
2273471bf0Spatrick  // The processor may dispatch up to 6 macro ops per cycle
2373471bf0Spatrick  // into the execution engine.
2473471bf0Spatrick  let IssueWidth = 6;
2573471bf0Spatrick  // AMD SOG 19h, 2.10.3
2673471bf0Spatrick  // The retire control unit (RCU) tracks the completion status of all
2773471bf0Spatrick  // outstanding operations (integer, load/store, and floating-point) and is
2873471bf0Spatrick  // the final arbiter for exception processing and recovery.
2973471bf0Spatrick  // The unit can receive up to 6 macro ops dispatched per cycle and track up
3073471bf0Spatrick  // to 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode.
3173471bf0Spatrick  let MicroOpBufferSize = 256;
3273471bf0Spatrick  // AMD SOG 19h, 2.9.1 Op Cache
3373471bf0Spatrick  // The op cache is organized as an associative cache with 64 sets and 8 ways.
3473471bf0Spatrick  // At each set-way intersection is an entry containing up to 8 macro ops.
3573471bf0Spatrick  // The maximum capacity of the op cache is 4K ops.
3673471bf0Spatrick  // Agner, 22.5 µop cache
3773471bf0Spatrick  // The size of the µop cache is big enough for holding most critical loops.
3873471bf0Spatrick  // FIXME: PR50584: MachineScheduler/PostRAScheduler have quadradic complexity,
3973471bf0Spatrick  //        with large values here the compilation of certain loops
4073471bf0Spatrick  //        ends up taking way too long.
4173471bf0Spatrick  // let LoopMicroOpBufferSize = 4096;
4273471bf0Spatrick  let LoopMicroOpBufferSize = 512;
4373471bf0Spatrick  // AMD SOG 19h, 2.6.2 L1 Data Cache
4473471bf0Spatrick  // The L1 data cache has a 4- or 5- cycle integer load-to-use latency.
4573471bf0Spatrick  // AMD SOG 19h, 2.12 L1 Data Cache
4673471bf0Spatrick  // The AGU and LS pipelines are optimized for simple address generation modes.
4773471bf0Spatrick  // <...> and can achieve 4-cycle load-to-use integer load latency.
4873471bf0Spatrick  let LoadLatency = 4;
4973471bf0Spatrick  // AMD SOG 19h, 2.12 L1 Data Cache
5073471bf0Spatrick  // The AGU and LS pipelines are optimized for simple address generation modes.
5173471bf0Spatrick  // <...> and can achieve <...> 7-cycle load-to-use FP load latency.
5273471bf0Spatrick  int VecLoadLatency = 7;
5373471bf0Spatrick  // Latency of a simple store operation.
5473471bf0Spatrick  int StoreLatency = 1;
5573471bf0Spatrick  // FIXME
5673471bf0Spatrick  let HighLatency = 25; // FIXME: any better choice?
5773471bf0Spatrick  // AMD SOG 19h, 2.8 Optimizing Branching
5873471bf0Spatrick  // The branch misprediction penalty is in the range from 11 to 18 cycles,
5973471bf0Spatrick  // <...>. The common case penalty is 13 cycles.
6073471bf0Spatrick  let MispredictPenalty = 13;
6173471bf0Spatrick
6273471bf0Spatrick  let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
6373471bf0Spatrick
6473471bf0Spatrick  let CompleteModel = 1;
6573471bf0Spatrick}
6673471bf0Spatrick
6773471bf0Spatricklet SchedModel = Znver3Model in {
6873471bf0Spatrick
6973471bf0Spatrick
7073471bf0Spatrick//===----------------------------------------------------------------------===//
7173471bf0Spatrick// RCU
7273471bf0Spatrick//===----------------------------------------------------------------------===//
7373471bf0Spatrick
7473471bf0Spatrick// AMD SOG 19h, 2.10.3 Retire Control Unit
7573471bf0Spatrick// The unit can receive up to 6 macro ops dispatched per cycle and track up to
7673471bf0Spatrick// 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...>
7773471bf0Spatrick// The retire unit handles in-order commit of up to eight macro ops per cycle.
7873471bf0Spatrickdef Zn3RCU : RetireControlUnit<Znver3Model.MicroOpBufferSize, 8>;
7973471bf0Spatrick
8073471bf0Spatrick//===----------------------------------------------------------------------===//
8173471bf0Spatrick// Units
8273471bf0Spatrick//===----------------------------------------------------------------------===//
8373471bf0Spatrick
8473471bf0Spatrick// There are total of three Units, each one with it's own schedulers.
8573471bf0Spatrick
8673471bf0Spatrick//===----------------------------------------------------------------------===//
8773471bf0Spatrick// Integer Execution Unit
8873471bf0Spatrick//
8973471bf0Spatrick
9073471bf0Spatrick// AMD SOG 19h, 2.4 Superscalar Organization
9173471bf0Spatrick// The processor uses four decoupled independent integer scheduler queues,
9273471bf0Spatrick// each one servicing one ALU pipeline and one or two other pipelines
9373471bf0Spatrick
9473471bf0Spatrick//
9573471bf0Spatrick// Execution pipes
9673471bf0Spatrick//===----------------------------------------------------------------------===//
9773471bf0Spatrick
9873471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units
9973471bf0Spatrick// The processor contains 4 general purpose integer execution pipes.
10073471bf0Spatrick// Each pipe has an ALU capable of general purpose integer operations.
10173471bf0Spatrickdef Zn3ALU0 : ProcResource<1>;
10273471bf0Spatrickdef Zn3ALU1 : ProcResource<1>;
10373471bf0Spatrickdef Zn3ALU2 : ProcResource<1>;
10473471bf0Spatrickdef Zn3ALU3 : ProcResource<1>;
10573471bf0Spatrick
10673471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units
10773471bf0Spatrick// There is also a separate branch execution unit.
10873471bf0Spatrickdef Zn3BRU1 : ProcResource<1>;
10973471bf0Spatrick
11073471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units
11173471bf0Spatrick// There are three Address Generation Units (AGUs) for all load and store
11273471bf0Spatrick// address generation. There are also 3 store data movement units
11373471bf0Spatrick// associated with the same schedulers as the AGUs.
11473471bf0Spatrickdef Zn3AGU0 : ProcResource<1>;
11573471bf0Spatrickdef Zn3AGU1 : ProcResource<1>;
11673471bf0Spatrickdef Zn3AGU2 : ProcResource<1>;
11773471bf0Spatrick
11873471bf0Spatrick//
11973471bf0Spatrick// Execution Units
12073471bf0Spatrick//===----------------------------------------------------------------------===//
12173471bf0Spatrick
12273471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units
12373471bf0Spatrick// ALU0 additionally has divide <...> execution capability.
12473471bf0Spatrickdefvar Zn3Divider = Zn3ALU0;
12573471bf0Spatrick
12673471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units
12773471bf0Spatrick// ALU0 additionally has <...> branch execution capability.
12873471bf0Spatrickdefvar Zn3BRU0 = Zn3ALU0;
12973471bf0Spatrick
13073471bf0Spatrick// Integer Multiplication issued on ALU1.
13173471bf0Spatrickdefvar Zn3Multiplier = Zn3ALU1;
13273471bf0Spatrick
13373471bf0Spatrick// Execution pipeline grouping
13473471bf0Spatrick//===----------------------------------------------------------------------===//
13573471bf0Spatrick
13673471bf0Spatrick// General ALU operations
13773471bf0Spatrickdef Zn3ALU0123 : ProcResGroup<[Zn3ALU0, Zn3ALU1, Zn3ALU2, Zn3ALU3]>;
13873471bf0Spatrick
13973471bf0Spatrick// General AGU operations
14073471bf0Spatrickdef Zn3AGU012 : ProcResGroup<[Zn3AGU0, Zn3AGU1, Zn3AGU2]>;
14173471bf0Spatrick
14273471bf0Spatrick// Control flow: jumps, calls
14373471bf0Spatrickdef Zn3BRU01 : ProcResGroup<[Zn3BRU0, Zn3BRU1]>;
14473471bf0Spatrick
14573471bf0Spatrick// Everything that isn't control flow, but still needs to access CC register,
14673471bf0Spatrick// namely: conditional moves, SETcc.
14773471bf0Spatrickdef Zn3ALU03 : ProcResGroup<[Zn3ALU0, Zn3ALU3]>;
14873471bf0Spatrick
14973471bf0Spatrick// Zn3ALU1 handles complex bit twiddling: CRC/PDEP/PEXT
15073471bf0Spatrick
15173471bf0Spatrick// Simple bit twiddling: bit test, shift/rotate, bit extraction
15273471bf0Spatrickdef Zn3ALU12 : ProcResGroup<[Zn3ALU1, Zn3ALU2]>;
15373471bf0Spatrick
15473471bf0Spatrick
15573471bf0Spatrick//
15673471bf0Spatrick// Scheduling
15773471bf0Spatrick//===----------------------------------------------------------------------===//
15873471bf0Spatrick
15973471bf0Spatrick// AMD SOG 19h, 2.10.3 Retire Control Unit
16073471bf0Spatrick// The integer physical register file (PRF) consists of 192 registers.
16173471bf0Spatrickdef Zn3IntegerPRF : RegisterFile<192, [GR64, CCR], [1, 1], [1, 0],
16273471bf0Spatrick                              6,  // Max moves that can be eliminated per cycle.
16373471bf0Spatrick                              0>; // Restrict move elimination to zero regs.
16473471bf0Spatrick
16573471bf0Spatrick// anandtech, The integer scheduler has a 4*24 entry macro op capacity.
16673471bf0Spatrick// AMD SOG 19h, 2.10.1 Schedulers
16773471bf0Spatrick// The schedulers can receive up to six macro ops per cycle, with a limit of
16873471bf0Spatrick// two per scheduler. Each scheduler can issue one micro op per cycle into
16973471bf0Spatrick// each of its associated pipelines
17073471bf0Spatrick// FIXME: these are 4 separate schedulers, not a single big one.
17173471bf0Spatrickdef Zn3Int : ProcResGroup<[Zn3ALU0, Zn3AGU0, Zn3BRU0, // scheduler 0
17273471bf0Spatrick                           Zn3ALU1, Zn3AGU1,          // scheduler 1
17373471bf0Spatrick                           Zn3ALU2, Zn3AGU2,          // scheduler 2
17473471bf0Spatrick                           Zn3ALU3,          Zn3BRU1  // scheduler 3
17573471bf0Spatrick                          ]> {
17673471bf0Spatrick  let BufferSize = !mul(4, 24);
17773471bf0Spatrick}
17873471bf0Spatrick
17973471bf0Spatrick
18073471bf0Spatrick//===----------------------------------------------------------------------===//
18173471bf0Spatrick// Floating-Point Unit
18273471bf0Spatrick//
18373471bf0Spatrick
18473471bf0Spatrick// AMD SOG 19h, 2.4 Superscalar Organization
18573471bf0Spatrick// The processor uses <...> two decoupled independent floating point schedulers
18673471bf0Spatrick// each servicing two FP pipelines and one store or FP-to-integer pipeline.
18773471bf0Spatrick
18873471bf0Spatrick//
18973471bf0Spatrick// Execution pipes
19073471bf0Spatrick//===----------------------------------------------------------------------===//
19173471bf0Spatrick
19273471bf0Spatrick// AMD SOG 19h, 2.10.1 Schedulers
19373471bf0Spatrick// <...>, and six FPU pipes.
19473471bf0Spatrick// Agner, 22.10 Floating point execution pipes
19573471bf0Spatrick// There are six floating point/vector execution pipes,
19673471bf0Spatrickdef Zn3FPP0  : ProcResource<1>;
19773471bf0Spatrickdef Zn3FPP1  : ProcResource<1>;
19873471bf0Spatrickdef Zn3FPP2  : ProcResource<1>;
19973471bf0Spatrickdef Zn3FPP3  : ProcResource<1>;
20073471bf0Spatrickdef Zn3FPP45 : ProcResource<2>;
20173471bf0Spatrick
20273471bf0Spatrick//
20373471bf0Spatrick// Execution Units
20473471bf0Spatrick//===----------------------------------------------------------------------===//
20573471bf0Spatrick// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
20673471bf0Spatrick
20773471bf0Spatrick// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
20873471bf0Spatrickdefvar Zn3FPFMul0 = Zn3FPP0;
20973471bf0Spatrickdefvar Zn3FPFMul1 = Zn3FPP1;
21073471bf0Spatrick
21173471bf0Spatrick// (v)FADD*
21273471bf0Spatrickdefvar Zn3FPFAdd0 = Zn3FPP2;
21373471bf0Spatrickdefvar Zn3FPFAdd1 = Zn3FPP3;
21473471bf0Spatrick
21573471bf0Spatrick// All convert operations except pack/unpack
21673471bf0Spatrickdefvar Zn3FPFCvt0 = Zn3FPP2;
21773471bf0Spatrickdefvar Zn3FPFCvt1 = Zn3FPP3;
21873471bf0Spatrick
21973471bf0Spatrick// All Divide and Square Root except Reciprocal Approximation
22073471bf0Spatrick// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
22173471bf0Spatrick// FDIV unit can support 2 simultaneous operations in flight
22273471bf0Spatrick// even though it occupies a single pipe.
22373471bf0Spatrick// FIXME: BufferSize=2 ?
22473471bf0Spatrickdefvar Zn3FPFDiv = Zn3FPP1;
22573471bf0Spatrick
22673471bf0Spatrick// Moves and Logical operations on Floating Point Data Types
22773471bf0Spatrickdefvar Zn3FPFMisc0 = Zn3FPP0;
22873471bf0Spatrickdefvar Zn3FPFMisc1 = Zn3FPP1;
22973471bf0Spatrickdefvar Zn3FPFMisc2 = Zn3FPP2;
23073471bf0Spatrickdefvar Zn3FPFMisc3 = Zn3FPP3;
23173471bf0Spatrick
23273471bf0Spatrick// Integer Adds, Subtracts, and Compares
23373471bf0Spatrick// Some complex VADD operations are not available in all pipes.
23473471bf0Spatrickdefvar Zn3FPVAdd0 = Zn3FPP0;
23573471bf0Spatrickdefvar Zn3FPVAdd1 = Zn3FPP1;
23673471bf0Spatrickdefvar Zn3FPVAdd2 = Zn3FPP2;
23773471bf0Spatrickdefvar Zn3FPVAdd3 = Zn3FPP3;
23873471bf0Spatrick
23973471bf0Spatrick// Integer Multiplies, SAD, Blendvb
24073471bf0Spatrickdefvar Zn3FPVMul0 = Zn3FPP0;
24173471bf0Spatrickdefvar Zn3FPVMul1 = Zn3FPP3;
24273471bf0Spatrick
24373471bf0Spatrick// Data Shuffles, Packs, Unpacks, Permute
24473471bf0Spatrick// Some complex shuffle operations are only available in pipe1.
24573471bf0Spatrickdefvar Zn3FPVShuf = Zn3FPP1;
24673471bf0Spatrickdefvar Zn3FPVShufAux = Zn3FPP2;
24773471bf0Spatrick
24873471bf0Spatrick// Bit Shift Left/Right operations
24973471bf0Spatrickdefvar Zn3FPVShift0 = Zn3FPP1;
25073471bf0Spatrickdefvar Zn3FPVShift1 = Zn3FPP2;
25173471bf0Spatrick
25273471bf0Spatrick// Moves and Logical operations on Packed Integer Data Types
25373471bf0Spatrickdefvar Zn3FPVMisc0 = Zn3FPP0;
25473471bf0Spatrickdefvar Zn3FPVMisc1 = Zn3FPP1;
25573471bf0Spatrickdefvar Zn3FPVMisc2 = Zn3FPP2;
25673471bf0Spatrickdefvar Zn3FPVMisc3 = Zn3FPP3;
25773471bf0Spatrick
25873471bf0Spatrick// *AES*
25973471bf0Spatrickdefvar Zn3FPAES0 = Zn3FPP0;
26073471bf0Spatrickdefvar Zn3FPAES1 = Zn3FPP1;
26173471bf0Spatrick
26273471bf0Spatrick// *CLM*
26373471bf0Spatrickdefvar Zn3FPCLM0 = Zn3FPP0;
26473471bf0Spatrickdefvar Zn3FPCLM1 = Zn3FPP1;
26573471bf0Spatrick
26673471bf0Spatrick// Execution pipeline grouping
26773471bf0Spatrick//===----------------------------------------------------------------------===//
26873471bf0Spatrick
26973471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit
27073471bf0Spatrick// Stores and floating point to general purpose register transfer
27173471bf0Spatrick// have 2 dedicated pipelines (pipe 5 and 6).
27273471bf0Spatrickdef Zn3FPU0123 : ProcResGroup<[Zn3FPP0, Zn3FPP1, Zn3FPP2, Zn3FPP3]>;
27373471bf0Spatrick
27473471bf0Spatrick// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
27573471bf0Spatrickdef Zn3FPFMul01 : ProcResGroup<[Zn3FPFMul0, Zn3FPFMul1]>;
27673471bf0Spatrick
27773471bf0Spatrick// (v)FADD*
27873471bf0Spatrick// Some complex VADD operations are not available in all pipes.
27973471bf0Spatrickdef Zn3FPFAdd01 : ProcResGroup<[Zn3FPFAdd0, Zn3FPFAdd1]>;
28073471bf0Spatrick
28173471bf0Spatrick// All convert operations except pack/unpack
28273471bf0Spatrickdef Zn3FPFCvt01 : ProcResGroup<[Zn3FPFCvt0, Zn3FPFCvt1]>;
28373471bf0Spatrick
28473471bf0Spatrick// All Divide and Square Root except Reciprocal Approximation
28573471bf0Spatrick// def Zn3FPFDiv : ProcResGroup<[Zn3FPFDiv]>;
28673471bf0Spatrick
28773471bf0Spatrick// Moves and Logical operations on Floating Point Data Types
28873471bf0Spatrickdef Zn3FPFMisc0123 : ProcResGroup<[Zn3FPFMisc0, Zn3FPFMisc1, Zn3FPFMisc2, Zn3FPFMisc3]>;
28973471bf0Spatrick
29073471bf0Spatrickdef Zn3FPFMisc12 : ProcResGroup<[Zn3FPFMisc1, Zn3FPFMisc2]>;
29173471bf0Spatrick
29273471bf0Spatrick// Loads, Stores and Move to General Register (EX) Operations
29373471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit
29473471bf0Spatrick// Stores and floating point to general purpose register transfer
29573471bf0Spatrick// have 2 dedicated pipelines (pipe 5 and 6).
29673471bf0Spatrickdefvar Zn3FPLd01 = Zn3FPP45;
29773471bf0Spatrick
29873471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit
29973471bf0Spatrick// Note that FP stores are supported on two pipelines,
30073471bf0Spatrick// but throughput is limited to one per cycle.
30173471bf0Spatricklet Super = Zn3FPP45 in
30273471bf0Spatrickdef Zn3FPSt : ProcResource<1>;
30373471bf0Spatrick
30473471bf0Spatrick// Integer Adds, Subtracts, and Compares
30573471bf0Spatrick// Some complex VADD operations are not available in all pipes.
30673471bf0Spatrickdef Zn3FPVAdd0123 : ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1, Zn3FPVAdd2, Zn3FPVAdd3]>;
30773471bf0Spatrick
30873471bf0Spatrickdef Zn3FPVAdd01: ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1]>;
30973471bf0Spatrickdef Zn3FPVAdd12: ProcResGroup<[Zn3FPVAdd1, Zn3FPVAdd2]>;
31073471bf0Spatrick
31173471bf0Spatrick// Integer Multiplies, SAD, Blendvb
31273471bf0Spatrickdef Zn3FPVMul01 : ProcResGroup<[Zn3FPVMul0, Zn3FPVMul1]>;
31373471bf0Spatrick
31473471bf0Spatrick// Data Shuffles, Packs, Unpacks, Permute
31573471bf0Spatrick// Some complex shuffle operations are only available in pipe1.
31673471bf0Spatrickdef Zn3FPVShuf01 : ProcResGroup<[Zn3FPVShuf, Zn3FPVShufAux]>;
31773471bf0Spatrick
31873471bf0Spatrick// Bit Shift Left/Right operations
31973471bf0Spatrickdef Zn3FPVShift01 : ProcResGroup<[Zn3FPVShift0, Zn3FPVShift1]>;
32073471bf0Spatrick
32173471bf0Spatrick// Moves and Logical operations on Packed Integer Data Types
32273471bf0Spatrickdef Zn3FPVMisc0123 : ProcResGroup<[Zn3FPVMisc0, Zn3FPVMisc1, Zn3FPVMisc2, Zn3FPVMisc3]>;
32373471bf0Spatrick
32473471bf0Spatrick// *AES*
32573471bf0Spatrickdef Zn3FPAES01 : ProcResGroup<[Zn3FPAES0, Zn3FPAES1]>;
32673471bf0Spatrick
32773471bf0Spatrick// *CLM*
32873471bf0Spatrickdef Zn3FPCLM01 : ProcResGroup<[Zn3FPCLM0, Zn3FPCLM1]>;
32973471bf0Spatrick
33073471bf0Spatrick
33173471bf0Spatrick//
33273471bf0Spatrick// Scheduling
33373471bf0Spatrick//===----------------------------------------------------------------------===//
33473471bf0Spatrick
33573471bf0Spatrick// Agner, 21.8 Register renaming and out-of-order schedulers
33673471bf0Spatrick// The floating point register file has 160 vector registers
33773471bf0Spatrick// of 128 bits each in Zen 1 and 256 bits each in Zen 2.
33873471bf0Spatrick// anandtech also confirms this.
33973471bf0Spatrickdef Zn3FpPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 1], [0, 1, 1],
34073471bf0Spatrick                            6,  // Max moves that can be eliminated per cycle.
34173471bf0Spatrick                            0>; // Restrict move elimination to zero regs.
34273471bf0Spatrick
34373471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit
34473471bf0Spatrick// The floating-point scheduler has a 2*32 entry macro op capacity.
34573471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit
34673471bf0Spatrick// <...> the scheduler can issue 1 micro op per cycle for each pipe.
34773471bf0Spatrick// FIXME: those are two separate schedulers, not a single big one.
34873471bf0Spatrickdef Zn3FP : ProcResGroup<[Zn3FPP0, Zn3FPP2,          /*Zn3FPP4,*/ // scheduler 0
34973471bf0Spatrick                          Zn3FPP1, Zn3FPP3, Zn3FPP45 /*Zn3FPP5*/  // scheduler 1
35073471bf0Spatrick                         ]> {
35173471bf0Spatrick  let BufferSize = !mul(2, 32);
35273471bf0Spatrick}
35373471bf0Spatrick
35473471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit
35573471bf0Spatrick// Macro ops can be dispatched to the 64 entry Non Scheduling Queue (NSQ)
35673471bf0Spatrick// even if floating-point scheduler is full.
35773471bf0Spatrick// FIXME: how to model this properly?
35873471bf0Spatrick
35973471bf0Spatrick
36073471bf0Spatrick//===----------------------------------------------------------------------===//
36173471bf0Spatrick// Load-Store Unit
36273471bf0Spatrick//
36373471bf0Spatrick
36473471bf0Spatrick// AMD SOG 19h, 2.12 Load-Store Unit
36573471bf0Spatrick// The LS unit contains three largely independent pipe-lines
36673471bf0Spatrick// enabling the execution of three 256-bit memory operations per cycle.
36773471bf0Spatrickdef Zn3LSU : ProcResource<3>;
36873471bf0Spatrick
36973471bf0Spatrick// AMD SOG 19h, 2.12 Load-Store Unit
37073471bf0Spatrick// All three memory operations can be loads.
37173471bf0Spatricklet Super = Zn3LSU in
37273471bf0Spatrickdef Zn3Load : ProcResource<3> {
37373471bf0Spatrick  // AMD SOG 19h, 2.12 Load-Store Unit
37473471bf0Spatrick  // The LS unit can process up to 72 out-of-order loads.
37573471bf0Spatrick  let BufferSize = 72;
37673471bf0Spatrick}
37773471bf0Spatrick
37873471bf0Spatrickdef Zn3LoadQueue : LoadQueue<Zn3Load>;
37973471bf0Spatrick
38073471bf0Spatrick// AMD SOG 19h, 2.12 Load-Store Unit
38173471bf0Spatrick// A maximum of two of the memory operations can be stores.
38273471bf0Spatricklet Super = Zn3LSU in
38373471bf0Spatrickdef Zn3Store : ProcResource<2> {
38473471bf0Spatrick  // AMD SOG 19h, 2.12 Load-Store Unit
38573471bf0Spatrick  // The LS unit utilizes a 64-entry store queue (STQ).
38673471bf0Spatrick  let BufferSize = 64;
38773471bf0Spatrick}
38873471bf0Spatrick
38973471bf0Spatrickdef Zn3StoreQueue : StoreQueue<Zn3Store>;
39073471bf0Spatrick
39173471bf0Spatrick//===----------------------------------------------------------------------===//
39273471bf0Spatrick// Basic helper classes.
39373471bf0Spatrick//===----------------------------------------------------------------------===//
39473471bf0Spatrick
39573471bf0Spatrick// Many SchedWrites are defined in pairs with and without a folded load.
39673471bf0Spatrick// Instructions with folded loads are usually micro-fused, so they only appear
39773471bf0Spatrick// as two micro-ops when dispatched by the schedulers.
39873471bf0Spatrick// This multiclass defines the resource usage for variants with and without
39973471bf0Spatrick// folded loads.
40073471bf0Spatrick
40173471bf0Spatrickmulticlass __zn3WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
40273471bf0Spatrick                         int Lat = 1, list<int> Res = [], int UOps = 1> {
40373471bf0Spatrick  def : WriteRes<SchedRW, ExePorts> {
40473471bf0Spatrick    let Latency = Lat;
40573471bf0Spatrick    let ResourceCycles = Res;
40673471bf0Spatrick    let NumMicroOps = UOps;
40773471bf0Spatrick  }
40873471bf0Spatrick}
40973471bf0Spatrick
41073471bf0Spatrickmulticlass __zn3WriteResPair<X86FoldableSchedWrite SchedRW,
41173471bf0Spatrick                             list<ProcResourceKind> ExePorts, int Lat,
41273471bf0Spatrick                             list<int> Res, int UOps, int LoadLat, int LoadUOps,
41373471bf0Spatrick                             ProcResourceKind AGU, int LoadRes> {
41473471bf0Spatrick  defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
41573471bf0Spatrick
41673471bf0Spatrick  defm : __zn3WriteRes<SchedRW.Folded,
41773471bf0Spatrick                       !listconcat([AGU, Zn3Load], ExePorts),
41873471bf0Spatrick                       !add(Lat, LoadLat),
41973471bf0Spatrick                       !if(!and(!empty(Res), !eq(LoadRes, 1)),
42073471bf0Spatrick                         [],
42173471bf0Spatrick                         !listconcat([1, LoadRes],
42273471bf0Spatrick                           !if(!empty(Res),
42373471bf0Spatrick                             !listsplat(1, !size(ExePorts)),
42473471bf0Spatrick                             Res))),
42573471bf0Spatrick                       !add(UOps, LoadUOps)>;
42673471bf0Spatrick}
42773471bf0Spatrick
42873471bf0Spatrick// For classes without folded loads.
42973471bf0Spatrickmulticlass Zn3WriteResInt<SchedWrite SchedRW,
43073471bf0Spatrick                          list<ProcResourceKind> ExePorts, int Lat = 1,
43173471bf0Spatrick                          list<int> Res = [], int UOps = 1> {
43273471bf0Spatrick  defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
43373471bf0Spatrick}
43473471bf0Spatrick
43573471bf0Spatrickmulticlass Zn3WriteResXMM<SchedWrite SchedRW,
43673471bf0Spatrick                          list<ProcResourceKind> ExePorts, int Lat = 1,
43773471bf0Spatrick                          list<int> Res = [], int UOps = 1> {
43873471bf0Spatrick  defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
43973471bf0Spatrick}
44073471bf0Spatrick
44173471bf0Spatrickmulticlass Zn3WriteResYMM<SchedWrite SchedRW,
44273471bf0Spatrick                          list<ProcResourceKind> ExePorts, int Lat = 1,
44373471bf0Spatrick                          list<int> Res = [], int UOps = 1> {
44473471bf0Spatrick  defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
44573471bf0Spatrick}
44673471bf0Spatrick
44773471bf0Spatrick// For classes with folded loads.
44873471bf0Spatrickmulticlass Zn3WriteResIntPair<X86FoldableSchedWrite SchedRW,
44973471bf0Spatrick                              list<ProcResourceKind> ExePorts, int Lat = 1,
45073471bf0Spatrick                              list<int> Res = [], int UOps = 1,
45173471bf0Spatrick                              int LoadUOps = 0, int LoadRes = 1> {
45273471bf0Spatrick  defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
45373471bf0Spatrick                           Znver3Model.LoadLatency,
45473471bf0Spatrick                           LoadUOps, Zn3AGU012, LoadRes>;
45573471bf0Spatrick}
45673471bf0Spatrick
45773471bf0Spatrickmulticlass Zn3WriteResXMMPair<X86FoldableSchedWrite SchedRW,
45873471bf0Spatrick                              list<ProcResourceKind> ExePorts, int Lat = 1,
45973471bf0Spatrick                              list<int> Res = [], int UOps = 1,
46073471bf0Spatrick                              int LoadUOps = 0, int LoadRes = 1> {
46173471bf0Spatrick  defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
46273471bf0Spatrick                           Znver3Model.VecLoadLatency,
46373471bf0Spatrick                           LoadUOps, Zn3FPLd01, LoadRes>;
46473471bf0Spatrick}
46573471bf0Spatrick
46673471bf0Spatrickmulticlass Zn3WriteResYMMPair<X86FoldableSchedWrite SchedRW,
46773471bf0Spatrick                              list<ProcResourceKind> ExePorts, int Lat = 1,
46873471bf0Spatrick                              list<int> Res = [], int UOps = 1,
46973471bf0Spatrick                              int LoadUOps = 0, int LoadRes = 1> {
47073471bf0Spatrick  defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
47173471bf0Spatrick                           Znver3Model.VecLoadLatency,
47273471bf0Spatrick                           LoadUOps, Zn3FPLd01, LoadRes>;
47373471bf0Spatrick}
47473471bf0Spatrick
47573471bf0Spatrick
47673471bf0Spatrick//===----------------------------------------------------------------------===//
47773471bf0Spatrick// Here be dragons.
47873471bf0Spatrick//===----------------------------------------------------------------------===//
47973471bf0Spatrick
48073471bf0Spatrickdef : ReadAdvance<ReadAfterLd, Znver3Model.LoadLatency>;
48173471bf0Spatrick
48273471bf0Spatrickdef : ReadAdvance<ReadAfterVecLd, Znver3Model.VecLoadLatency>;
48373471bf0Spatrickdef : ReadAdvance<ReadAfterVecXLd, Znver3Model.VecLoadLatency>;
48473471bf0Spatrickdef : ReadAdvance<ReadAfterVecYLd, Znver3Model.VecLoadLatency>;
48573471bf0Spatrick
48673471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit
48773471bf0Spatrick// There is 1 cycle of added latency for a result to cross
48873471bf0Spatrick// from F to I or I to F domain.
48973471bf0Spatrickdef : ReadAdvance<ReadInt2Fpu, -1>;
49073471bf0Spatrick
49173471bf0Spatrick// Instructions with both a load and a store folded are modeled as a folded
49273471bf0Spatrick// load + WriteRMW.
49373471bf0Spatrickdefm : Zn3WriteResInt<WriteRMW, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 1], 0>;
49473471bf0Spatrick
49573471bf0Spatrick// Loads, stores, and moves, not folded with other operations.
49673471bf0Spatrickdefm : Zn3WriteResInt<WriteLoad, [Zn3AGU012, Zn3Load], !add(Znver3Model.LoadLatency, 1), [1, 1], 1>;
49773471bf0Spatrick
49873471bf0Spatrick// Model the effect of clobbering the read-write mask operand of the GATHER operation.
49973471bf0Spatrick// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
50073471bf0Spatrickdefm : Zn3WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver3Model.LoadLatency, 1), [], 0>;
50173471bf0Spatrick
50273471bf0Spatrickdef Zn3WriteMOVSlow : SchedWriteRes<[Zn3AGU012, Zn3Load]> {
50373471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, 1);
50473471bf0Spatrick  let ResourceCycles = [3, 1];
50573471bf0Spatrick  let NumMicroOps = 1;
50673471bf0Spatrick}
50773471bf0Spatrickdef : InstRW<[Zn3WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>;
50873471bf0Spatrick
50973471bf0Spatrickdefm : Zn3WriteResInt<WriteStore, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 2], 1>;
51073471bf0Spatrickdefm : Zn3WriteResInt<WriteStoreNT, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 2], 1>;
51173471bf0Spatrickdefm : Zn3WriteResInt<WriteMove, [Zn3ALU0123], 1, [4], 1>;
51273471bf0Spatrick
51373471bf0Spatrick// Treat misc copies as a move.
51473471bf0Spatrickdef : InstRW<[WriteMove], (instrs COPY)>;
51573471bf0Spatrick
51673471bf0Spatrickdef Zn3WriteMOVBE16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
51773471bf0Spatrick  let Latency = Znver3Model.LoadLatency;
51873471bf0Spatrick  let ResourceCycles = [1, 1, 4];
51973471bf0Spatrick  let NumMicroOps = 1;
52073471bf0Spatrick}
52173471bf0Spatrickdef : InstRW<[Zn3WriteMOVBE16rm], (instrs MOVBE16rm)>;
52273471bf0Spatrick
52373471bf0Spatrickdef Zn3WriteMOVBEmr : SchedWriteRes<[Zn3ALU0123, Zn3AGU012, Zn3Store]> {
52473471bf0Spatrick  let Latency = Znver3Model.StoreLatency;
52573471bf0Spatrick  let ResourceCycles = [4, 1, 1];
52673471bf0Spatrick  let NumMicroOps = 2;
52773471bf0Spatrick}
52873471bf0Spatrickdef : InstRW<[Zn3WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>;
52973471bf0Spatrick
53073471bf0Spatrick// Arithmetic.
53173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteALU, [Zn3ALU0123], 1, [1], 1>; // Simple integer ALU op.
53273471bf0Spatrick
53373471bf0Spatrickdef Zn3WriteALUSlow : SchedWriteRes<[Zn3ALU0123]> {
53473471bf0Spatrick  let Latency = 1;
53573471bf0Spatrick  let ResourceCycles = [4];
53673471bf0Spatrick  let NumMicroOps = 1;
53773471bf0Spatrick}
53873471bf0Spatrickdef : InstRW<[Zn3WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32,
53973471bf0Spatrick                                        AND8i8, AND16i16, AND32i32, AND64i32,
54073471bf0Spatrick                                         OR8i8,  OR16i16,  OR32i32,  OR64i32,
54173471bf0Spatrick                                        SUB8i8, SUB16i16, SUB32i32, SUB64i32,
54273471bf0Spatrick                                        XOR8i8, XOR16i16, XOR32i32, XOR64i32)>;
54373471bf0Spatrick
54473471bf0Spatrickdef Zn3WriteMoveExtend : SchedWriteRes<[Zn3ALU0123]> {
54573471bf0Spatrick  let Latency = 1;
54673471bf0Spatrick  let ResourceCycles = [4];
54773471bf0Spatrick  let NumMicroOps = 1;
54873471bf0Spatrick}
54973471bf0Spatrickdef : InstRW<[Zn3WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>;
55073471bf0Spatrick
55173471bf0Spatrickdef Zn3WriteMaterialize32bitImm: SchedWriteRes<[Zn3ALU0123]> {
55273471bf0Spatrick  let Latency = 1;
55373471bf0Spatrick  let ResourceCycles = [2];
55473471bf0Spatrick  let NumMicroOps = 1;
55573471bf0Spatrick}
55673471bf0Spatrickdef : InstRW<[Zn3WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>;
55773471bf0Spatrick
55873471bf0Spatrickdef Zn3WritePDEP_PEXT : SchedWriteRes<[Zn3ALU1]> {
55973471bf0Spatrick  let Latency = 3;
56073471bf0Spatrick  let ResourceCycles = [1];
56173471bf0Spatrick  let NumMicroOps = 1;
56273471bf0Spatrick}
56373471bf0Spatrickdef : InstRW<[Zn3WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr,
56473471bf0Spatrick                                          PEXT32rr, PEXT64rr)>;
56573471bf0Spatrick
56673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteADC, [Zn3ALU0123], 1, [4], 1>; // Integer ALU + flags op.
56773471bf0Spatrick
56873471bf0Spatrickdef Zn3WriteADC8mr_SBB8mr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123, Zn3Store]> {
56973471bf0Spatrick  let Latency = 1;
57073471bf0Spatrick  let ResourceCycles = [1, 1, 7, 1];
57173471bf0Spatrick  let NumMicroOps = 1;
57273471bf0Spatrick}
57373471bf0Spatrickdef : InstRW<[Zn3WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>;
57473471bf0Spatrick
57573471bf0Spatrick// This is for simple LEAs with one or two input operands.
57673471bf0Spatrickdefm : Zn3WriteResInt<WriteLEA, [Zn3AGU012], 1, [1], 1>;     // LEA instructions can't fold loads.
57773471bf0Spatrick
57873471bf0Spatrick// This write is used for slow LEA instructions.
57973471bf0Spatrickdef Zn3Write3OpsLEA : SchedWriteRes<[Zn3ALU0123]> {
58073471bf0Spatrick  let Latency = 2;
58173471bf0Spatrick  let ResourceCycles = [1];
58273471bf0Spatrick  let NumMicroOps = 2;
58373471bf0Spatrick}
58473471bf0Spatrick
58573471bf0Spatrick// On Znver3, a slow LEA is either a 3Ops LEA (base, index, offset),
58673471bf0Spatrick// or an LEA with a `Scale` value different than 1.
58773471bf0Spatrickdef Zn3SlowLEAPredicate : MCSchedPredicate<
58873471bf0Spatrick  CheckAny<[
58973471bf0Spatrick    // A 3-operand LEA (base, index, offset).
59073471bf0Spatrick    IsThreeOperandsLEAFn,
59173471bf0Spatrick    // An LEA with a "Scale" different than 1.
59273471bf0Spatrick    CheckAll<[
59373471bf0Spatrick      CheckIsImmOperand<2>,
59473471bf0Spatrick      CheckNot<CheckImmOperand<2, 1>>
59573471bf0Spatrick    ]>
59673471bf0Spatrick  ]>
59773471bf0Spatrick>;
59873471bf0Spatrick
59973471bf0Spatrickdef Zn3WriteLEA : SchedWriteVariant<[
60073471bf0Spatrick    SchedVar<Zn3SlowLEAPredicate, [Zn3Write3OpsLEA]>,
60173471bf0Spatrick    SchedVar<NoSchedPred,         [WriteLEA]>
60273471bf0Spatrick]>;
60373471bf0Spatrick
60473471bf0Spatrickdef : InstRW<[Zn3WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
60573471bf0Spatrick
60673471bf0Spatrickdef Zn3SlowLEA16r : SchedWriteRes<[Zn3ALU0123]> {
60773471bf0Spatrick  let Latency = 2; // FIXME: not from llvm-exegesis
60873471bf0Spatrick  let ResourceCycles = [4];
60973471bf0Spatrick  let NumMicroOps = 2;
61073471bf0Spatrick}
61173471bf0Spatrick
61273471bf0Spatrickdef : InstRW<[Zn3SlowLEA16r], (instrs LEA16r)>;
61373471bf0Spatrick
61473471bf0Spatrick// Integer multiplication
61573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul8, [Zn3Multiplier], 3, [3], 1>; // Integer 8-bit multiplication.
61673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/1>; // Integer 16-bit multiplication.
61773471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
61873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
61973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>;    // Integer 32-bit multiplication.
620*d415bd75Srobertdefm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>;    // Integer 32-bit Unsigned Multiply Without Affecting Flags.
62173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
62273471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
62373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>;    // Integer 64-bit multiplication.
624*d415bd75Srobertdefm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>;    // Integer 32-bit Unsigned Multiply Without Affecting Flags.
62573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
62673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
627*d415bd75Srobertdefm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>;  // Integer multiplication, high part.
62873471bf0Spatrickdefm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>;  // Integer multiplication, high part.
62973471bf0Spatrick
63073471bf0Spatrickdefm : Zn3WriteResInt<WriteBSWAP32, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap.
63173471bf0Spatrickdefm : Zn3WriteResInt<WriteBSWAP64, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap.
63273471bf0Spatrick
63373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteCMPXCHG, [Zn3ALU0123], 3, [12], 5>; // Compare and set, compare and swap.
63473471bf0Spatrick
63573471bf0Spatrickdef Zn3WriteCMPXCHG8rr : SchedWriteRes<[Zn3ALU0123]> {
63673471bf0Spatrick  let Latency = 3;
63773471bf0Spatrick  let ResourceCycles = [12];
63873471bf0Spatrick  let NumMicroOps = 3;
63973471bf0Spatrick}
64073471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
64173471bf0Spatrick
64273471bf0Spatrickdefm : Zn3WriteResInt<WriteCMPXCHGRMW, [Zn3ALU0123], 3, [12], 6>;     // Compare and set, compare and swap.
64373471bf0Spatrick
64473471bf0Spatrickdef Zn3WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
64573471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteCMPXCHG8rr.Latency);
64673471bf0Spatrick  let ResourceCycles = [1, 1, 12];
64773471bf0Spatrick  let NumMicroOps = !add(Zn3WriteCMPXCHG8rr.NumMicroOps, 2);
64873471bf0Spatrick}
64973471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>;
65073471bf0Spatrick
65173471bf0Spatrickdef Zn3WriteCMPXCHG8B : SchedWriteRes<[Zn3ALU0123]> {
65273471bf0Spatrick  let Latency = 3; // FIXME: not from llvm-exegesis
65373471bf0Spatrick  let ResourceCycles = [24];
65473471bf0Spatrick  let NumMicroOps = 19;
65573471bf0Spatrick}
65673471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
65773471bf0Spatrick
65873471bf0Spatrickdef Zn3WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn3ALU0123]> {
65973471bf0Spatrick  let Latency = 4; // FIXME: not from llvm-exegesis
66073471bf0Spatrick  let ResourceCycles = [59];
66173471bf0Spatrick  let NumMicroOps = 28;
66273471bf0Spatrick}
66373471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>;
66473471bf0Spatrick
66573471bf0Spatrickdef Zn3WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn3ALU0123]> {
66673471bf0Spatrick  let Latency = 1;
66773471bf0Spatrick  let ResourceCycles = [2];
66873471bf0Spatrick  let NumMicroOps = 2;
66973471bf0Spatrick}
67073471bf0Spatrickdef : InstRW<[Zn3WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>;
67173471bf0Spatrick
67273471bf0Spatrickdef Zn3WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
67373471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, 3); // FIXME: not from llvm-exegesis
67473471bf0Spatrick  let ResourceCycles = [1, 1, 2];
67573471bf0Spatrick  let NumMicroOps = 5;
67673471bf0Spatrick}
67773471bf0Spatrickdef : InstRW<[Zn3WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>;
67873471bf0Spatrick
67973471bf0Spatrickdef Zn3WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
68073471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, 2); // FIXME: not from llvm-exegesis
68173471bf0Spatrick  let ResourceCycles = [1, 1, 2];
68273471bf0Spatrick  let NumMicroOps = 2;
68373471bf0Spatrick}
68473471bf0Spatrickdef : InstRW<[Zn3WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>;
68573471bf0Spatrick
68673471bf0Spatrick// Integer division.
68773471bf0Spatrick// FIXME: uops for 8-bit division measures as 2. for others it's a guess.
68873471bf0Spatrick// FIXME: latency for 8-bit division measures as 10. for others it's a guess.
68973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv8, [Zn3Divider], 10, [10], 2>;
69073471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv16, [Zn3Divider], 11, [11], 2>;
69173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv32, [Zn3Divider], 13, [13], 2>;
69273471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv64, [Zn3Divider], 17, [17], 2>;
69373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv8, [Zn3Divider], 10, [10], 2>;
69473471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv16, [Zn3Divider], 11, [11], 2>;
69573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv32, [Zn3Divider], 13, [13], 2>;
69673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv64, [Zn3Divider], 17, [17], 2>;
69773471bf0Spatrick
69873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBSF, [Zn3ALU1], 3, [3], 6, /*LoadUOps=*/2>; // Bit scan forward.
69973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBSR, [Zn3ALU1], 4, [4], 6, /*LoadUOps=*/2>; // Bit scan reverse.
70073471bf0Spatrick
70173471bf0Spatrickdefm : Zn3WriteResIntPair<WritePOPCNT, [Zn3ALU0123], 1, [1], 1>; // Bit population count.
70273471bf0Spatrick
70373471bf0Spatrickdef Zn3WritePOPCNT16rr : SchedWriteRes<[Zn3ALU0123]> {
70473471bf0Spatrick  let Latency = 1;
70573471bf0Spatrick  let ResourceCycles = [4];
70673471bf0Spatrick  let NumMicroOps = 1;
70773471bf0Spatrick}
70873471bf0Spatrickdef : InstRW<[Zn3WritePOPCNT16rr], (instrs POPCNT16rr)>;
70973471bf0Spatrick
71073471bf0Spatrickdefm : Zn3WriteResIntPair<WriteLZCNT, [Zn3ALU0123], 1, [1], 1>; // Leading zero count.
71173471bf0Spatrick
71273471bf0Spatrickdef Zn3WriteLZCNT16rr : SchedWriteRes<[Zn3ALU0123]> {
71373471bf0Spatrick  let Latency = 1;
71473471bf0Spatrick  let ResourceCycles = [4];
71573471bf0Spatrick  let NumMicroOps = 1;
71673471bf0Spatrick}
71773471bf0Spatrickdef : InstRW<[Zn3WriteLZCNT16rr], (instrs LZCNT16rr)>;
71873471bf0Spatrick
71973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteTZCNT, [Zn3ALU12], 2, [1], 2>; // Trailing zero count.
72073471bf0Spatrick
72173471bf0Spatrickdef Zn3WriteTZCNT16rr : SchedWriteRes<[Zn3ALU0123]> {
72273471bf0Spatrick  let Latency = 2;
72373471bf0Spatrick  let ResourceCycles = [4];
72473471bf0Spatrick  let NumMicroOps = 2;
72573471bf0Spatrick}
72673471bf0Spatrickdef : InstRW<[Zn3WriteTZCNT16rr], (instrs TZCNT16rr)>;
72773471bf0Spatrick
72873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteCMOV, [Zn3ALU03], 1, [1], 1>; // Conditional move.
72973471bf0Spatrickdefm : Zn3WriteResInt<WriteFCMOV, [Zn3ALU0123], 7, [28], 7>; // FIXME: not from llvm-exegesis // X87 conditional move.
73073471bf0Spatrickdefm : Zn3WriteResInt<WriteSETCC, [Zn3ALU03], 1, [2], 1>; // Set register based on condition code.
73173471bf0Spatrickdefm : Zn3WriteResInt<WriteSETCCStore, [Zn3ALU03, Zn3AGU012, Zn3Store], 2, [2, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
73273471bf0Spatrickdefm : Zn3WriteResInt<WriteLAHFSAHF, [Zn3ALU3], 1, [1], 1>; // Load/Store flags in AH.
73373471bf0Spatrick
73473471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTest, [Zn3ALU12], 1, [1], 1>; // Bit Test
73573471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestImmLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 1), [1, 1, 1], 2>;
73673471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestRegLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 1), [1, 1, 1], 7>;
73773471bf0Spatrick
73873471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestSet, [Zn3ALU12], 2, [2], 2>; // Bit Test + Set
73973471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestSetImmLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 1], 4>;
74073471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestSetRegLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 1], 9>;
74173471bf0Spatrick
74273471bf0Spatrick// Integer shifts and rotates.
74373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteShift, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
74473471bf0Spatrickdefm : Zn3WriteResIntPair<WriteShiftCL, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
74573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteRotate, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
74673471bf0Spatrick
74773471bf0Spatrickdef Zn3WriteRotateR1 : SchedWriteRes<[Zn3ALU12]> {
74873471bf0Spatrick  let Latency = 1;
74973471bf0Spatrick  let ResourceCycles = [2];
75073471bf0Spatrick  let NumMicroOps = 1;
75173471bf0Spatrick}
75273471bf0Spatrickdef : InstRW<[Zn3WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
75373471bf0Spatrick                                         RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
75473471bf0Spatrick
75573471bf0Spatrickdef Zn3WriteRotateM1 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
75673471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateR1.Latency);
75773471bf0Spatrick  let ResourceCycles = [1, 1, 2];
75873471bf0Spatrick  let NumMicroOps = !add(Zn3WriteRotateR1.NumMicroOps, 1);
75973471bf0Spatrick}
76073471bf0Spatrickdef : InstRW<[Zn3WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1,
76173471bf0Spatrick                                         RCR8m1, RCR16m1, RCR32m1, RCR64m1)>;
76273471bf0Spatrick
76373471bf0Spatrickdef Zn3WriteRotateRightRI : SchedWriteRes<[Zn3ALU12]> {
76473471bf0Spatrick  let Latency = 3;
76573471bf0Spatrick  let ResourceCycles = [6];
76673471bf0Spatrick  let NumMicroOps = 7;
76773471bf0Spatrick}
76873471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
76973471bf0Spatrick
77073471bf0Spatrickdef Zn3WriteRotateRightMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
77173471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRI.Latency);
77273471bf0Spatrick  let ResourceCycles = [1, 1, 8];
77373471bf0Spatrick  let NumMicroOps = !add(Zn3WriteRotateRightRI.NumMicroOps, 3);
77473471bf0Spatrick}
77573471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>;
77673471bf0Spatrick
77773471bf0Spatrickdef Zn3WriteRotateLeftRI : SchedWriteRes<[Zn3ALU12]> {
77873471bf0Spatrick  let Latency = 4;
77973471bf0Spatrick  let ResourceCycles = [8];
78073471bf0Spatrick  let NumMicroOps = 9;
78173471bf0Spatrick}
78273471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
78373471bf0Spatrick
78473471bf0Spatrickdef Zn3WriteRotateLeftMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
78573471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRI.Latency);
78673471bf0Spatrick  let ResourceCycles = [1, 1, 8];
78773471bf0Spatrick  let NumMicroOps = !add(Zn3WriteRotateLeftRI.NumMicroOps, 2);
78873471bf0Spatrick}
78973471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>;
79073471bf0Spatrick
79173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteRotateCL, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
79273471bf0Spatrick
79373471bf0Spatrickdef Zn3WriteRotateRightRCL : SchedWriteRes<[Zn3ALU12]> {
79473471bf0Spatrick  let Latency = 3;
79573471bf0Spatrick  let ResourceCycles = [6];
79673471bf0Spatrick  let NumMicroOps = 7;
79773471bf0Spatrick}
79873471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>;
79973471bf0Spatrick
80073471bf0Spatrickdef Zn3WriteRotateRightMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
80173471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRCL.Latency);
80273471bf0Spatrick  let ResourceCycles = [1, 1, 8];
80373471bf0Spatrick  let NumMicroOps = !add(Zn3WriteRotateRightRCL.NumMicroOps, 2);
80473471bf0Spatrick}
80573471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>;
80673471bf0Spatrick
80773471bf0Spatrickdef Zn3WriteRotateLeftRCL : SchedWriteRes<[Zn3ALU12]> {
80873471bf0Spatrick  let Latency = 4;
80973471bf0Spatrick  let ResourceCycles = [8];
81073471bf0Spatrick  let NumMicroOps = 9;
81173471bf0Spatrick}
81273471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>;
81373471bf0Spatrick
81473471bf0Spatrickdef Zn3WriteRotateLeftMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
81573471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRCL.Latency);
81673471bf0Spatrick  let ResourceCycles = [1, 1, 8];
81773471bf0Spatrick  let NumMicroOps = !add(Zn3WriteRotateLeftRCL.NumMicroOps, 2);
81873471bf0Spatrick}
81973471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>;
82073471bf0Spatrick
82173471bf0Spatrick// Double shift instructions.
82273471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDrri, [Zn3ALU12], 2, [3], 4>;
82373471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDrrcl, [Zn3ALU12], 2, [3], 5>;
82473471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDmri, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 4], 6>;
82573471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDmrcl, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 4], 6>;
82673471bf0Spatrick
82773471bf0Spatrick// BMI1 BEXTR/BLS, BMI2 BZHI
82873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBEXTR, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
82973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBLS, [Zn3ALU0123], 2, [2], 2, /*LoadUOps=*/1>;
83073471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBZHI, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
83173471bf0Spatrick
83273471bf0Spatrick// Idioms that clear a register, like xorps %xmm0, %xmm0.
83373471bf0Spatrick// These can often bypass execution ports completely.
83473471bf0Spatrickdefm : Zn3WriteResInt<WriteZero, [Zn3ALU0123], 0, [0], 1>;
83573471bf0Spatrick
83673471bf0Spatrick// Branches don't produce values, so they have no latency, but they still
83773471bf0Spatrick// consume resources. Indirect branches can fold loads.
83873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteJump, [Zn3BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis
83973471bf0Spatrick
84073471bf0Spatrick// Floating point. This covers both scalar and vector operations.
84173471bf0Spatrickdefm : Zn3WriteResInt<WriteFLD0, [Zn3FPLd01, Zn3Load, Zn3FPP1], !add(Znver3Model.LoadLatency, 4), [1, 1, 1], 1>;
84273471bf0Spatrickdefm : Zn3WriteResInt<WriteFLD1, [Zn3FPLd01, Zn3Load, Zn3FPP1], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>;
84373471bf0Spatrickdefm : Zn3WriteResInt<WriteFLDC, [Zn3FPLd01, Zn3Load, Zn3FPP1], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>;
84473471bf0Spatrickdefm : Zn3WriteResXMM<WriteFLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
84573471bf0Spatrickdefm : Zn3WriteResXMM<WriteFLoadX, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
84673471bf0Spatrickdefm : Zn3WriteResYMM<WriteFLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
84773471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMaskedLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
84873471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMaskedLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
84973471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStore, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
85073471bf0Spatrick
85173471bf0Spatrickdef Zn3WriteWriteFStoreMMX : SchedWriteRes<[Zn3FPSt, Zn3Store]> {
85273471bf0Spatrick  let Latency = 2; // FIXME: not from llvm-exegesis
85373471bf0Spatrick  let ResourceCycles = [1, 1];
85473471bf0Spatrick  let NumMicroOps = 2;
85573471bf0Spatrick}
85673471bf0Spatrickdef : InstRW<[Zn3WriteWriteFStoreMMX], (instrs MOVHPDmr,  MOVHPSmr,
85773471bf0Spatrick                                               VMOVHPDmr, VMOVHPSmr)>;
85873471bf0Spatrick
85973471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStoreX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
86073471bf0Spatrickdefm : Zn3WriteResYMM<WriteFStoreY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
86173471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStoreNT, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
86273471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStoreNTX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
86373471bf0Spatrickdefm : Zn3WriteResYMM<WriteFStoreNTY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
86473471bf0Spatrick
86573471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMaskedStore32, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>;
86673471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMaskedStore64, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [4, 1], 10>;
86773471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMaskedStore32Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [12, 1], 42>;
86873471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMaskedStore64Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>;
86973471bf0Spatrick
87073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAdd, [Zn3FPFAdd01], 3, [1], 1>;  // Floating point add/sub.
87173471bf0Spatrick
87273471bf0Spatrickdef Zn3WriteX87Arith : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
87373471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
87473471bf0Spatrick  let ResourceCycles = [1, 1, 24];
87573471bf0Spatrick  let NumMicroOps = 2;
87673471bf0Spatrick}
87773471bf0Spatrickdef : InstRW<[Zn3WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m,
87873471bf0Spatrick                                         SUB_FI16m, SUB_FI32m,
87973471bf0Spatrick                                         SUBR_FI16m, SUBR_FI32m,
88073471bf0Spatrick                                         MUL_FI16m, MUL_FI32m)>;
88173471bf0Spatrick
88273471bf0Spatrickdef Zn3WriteX87Div : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
88373471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
88473471bf0Spatrick  let ResourceCycles = [1, 1, 62];
88573471bf0Spatrick  let NumMicroOps = 2;
88673471bf0Spatrick}
88773471bf0Spatrickdef : InstRW<[Zn3WriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
88873471bf0Spatrick                                       DIVR_FI16m, DIVR_FI32m)>;
88973471bf0Spatrick
89073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAddX, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (XMM).
89173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFAddY, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (YMM).
89273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFAddZ>; // Floating point add/sub (ZMM).
89373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAdd64, [Zn3FPFAdd01], 3, [1], 1>;  // Floating point double add/sub.
89473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAdd64X, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub (XMM).
89573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFAdd64Y, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub (YMM).
89673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFAdd64Z>; // Floating point double add/sub (ZMM).
89773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmp, [Zn3FPFMul01], 1, [1], 1>;  // Floating point compare.
89873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmpX, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (XMM).
89973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFCmpY, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (YMM).
90073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFCmpZ>; // Floating point compare (ZMM).
90173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmp64, [Zn3FPFMul01], 1, [1], 1>;  // Floating point double compare.
90273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmp64X, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare (XMM).
90373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFCmp64Y, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare (YMM).
90473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFCmp64Z>; // Floating point double compare (ZMM).
90573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCom, [Zn3FPFMul01], 3, [2], 1>; // FIXME: latency not from llvm-exegesis  // Floating point compare to flags (X87).
90673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFComX, [Zn3FPFMul01], 4, [2], 2>;  // FIXME: latency not from llvm-exegesis // Floating point compare to flags (SSE).
90773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMul, [Zn3FPFMul01], 3, [1], 1>;  // Floating point multiplication.
90873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMulX, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (XMM).
90973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFMulY, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (YMM).
91073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFMulZ>; // Floating point multiplication (YMM).
91173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMul64, [Zn3FPFMul01], 3, [1], 1>;  // Floating point double multiplication.
91273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMul64X, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication (XMM).
91373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFMul64Y, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication (YMM).
91473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFMul64Z>; // Floating point double multiplication (ZMM).
91573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDiv, [Zn3FPFDiv], 11, [3], 1>;  // Floating point division.
91673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDivX, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (XMM).
91773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFDivY, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (YMM).
91873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFDivZ>; // Floating point division (ZMM).
91973471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDiv64, [Zn3FPFDiv], 13, [5], 1>;  // Floating point double division.
92073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDiv64X, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division (XMM).
92173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFDiv64Y, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division (YMM).
92273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFDiv64Z>; // Floating point double division (ZMM).
92373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt, [Zn3FPFDiv], 15, [5], 1>;   // Floating point square root.
92473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrtX, [Zn3FPFDiv], 15, [5], 1>;  // Floating point square root (XMM).
92573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFSqrtY, [Zn3FPFDiv], 15, [5], 1>;  // Floating point square root (YMM).
92673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFSqrtZ>;  // Floating point square root (ZMM).
92773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt64, [Zn3FPFDiv], 21, [9], 1>;  // Floating point double square root.
92873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt64X, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root (XMM).
92973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFSqrt64Y, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root (YMM).
93073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFSqrt64Z>; // Floating point double square root (ZMM).
93173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt80, [Zn3FPFDiv], 22, [23], 1>; // FIXME: latency not from llvm-exegesis  // Floating point long double square root.
93273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRcp, [Zn3FPFMul01], 3, [1], 1>;  // Floating point reciprocal estimate.
93373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRcpX, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate (XMM).
93473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFRcpY, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate (YMM).
93573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFRcpZ>; // Floating point reciprocal estimate (ZMM).
93673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRsqrt, [Zn3FPFDiv], 3, [1], 1>;  // Floating point reciprocal square root estimate.
93773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRsqrtX, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate (XMM).
93873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFRsqrtY, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate (YMM).
93973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFRsqrtZ>; // Floating point reciprocal square root estimate (ZMM).
940*d415bd75Srobertdefm : Zn3WriteResXMMPair<WriteFMA, [Zn3FPFMul01], 4, [1], 1>;  // Fused Multiply Add.
941*d415bd75Srobertdefm : Zn3WriteResXMMPair<WriteFMAX, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add (XMM).
942*d415bd75Srobertdefm : Zn3WriteResYMMPair<WriteFMAY, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add (YMM).
94373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFMAZ>; // Fused Multiply Add (ZMM).
94473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteDPPD, [Zn3FPFMul01], 9, [6], 3, /*LoadUOps=*/2>; // Floating point double dot product.
94573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteDPPS, [Zn3FPFMul01], 15, [8], 8, /*LoadUOps=*/2>; // Floating point single dot product.
94673471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteDPPSY, [Zn3FPFMul01], 15, [8], 7, /*LoadUOps=*/1>; // Floating point single dot product (YMM).
94773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSign, [Zn3FPFMul01], 1, [2], 1>; // FIXME: latency not from llvm-exegesis  // Floating point fabs/fchs.
94873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRnd, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding.
94973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFRndY, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding (YMM).
95073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFRndZ>; // Floating point rounding (ZMM).
95173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFLogic, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals.
95273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFLogicY, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals (YMM).
95373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFLogicZ>; // Floating point and/or/xor logicals (ZMM).
95473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFTest, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions.
95573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFTestY, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (YMM).
95673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFTestZ>; // Floating point TEST instructions (ZMM).
95773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuffles.
95873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuffles (YMM).
95973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFShuffleZ>; // Floating point vector shuffles (ZMM).
96073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFVarShuffle, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles.
96173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFVarShuffleY, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles (YMM).
96273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; // Floating point vector variable shuffles (ZMM).
96373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFBlend, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends.
96473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFBlendY, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends (YMM).
96573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFBlendZ>; // Floating point vector blends (ZMM).
96673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFVarBlend, [Zn3FPFMul01], 1, [1], 1>; // Fp vector variable blends.
96773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFVarBlendY, [Zn3FPFMul01], 1, [1], 1>; // Fp vector variable blends (YMM).
96873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFVarBlendZ>; // Fp vector variable blends (ZMM).
96973471bf0Spatrick
97073471bf0Spatrick// Horizontal Add/Sub (float and integer)
97173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFHAdd, [Zn3FPFAdd0], 6, [2], 4>;
97273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFHAddY, [Zn3FPFAdd0], 6, [2], 3, /*LoadUOps=*/1>;
97373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFHAddZ>;
97473471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePHAdd, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>;
97573471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePHAddX, [Zn3FPVAdd0], 2, [2], 4>;
97673471bf0Spatrickdefm : Zn3WriteResYMMPair<WritePHAddY, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>;
97773471bf0Spatrickdefm : X86WriteResPairUnsupported<WritePHAddZ>;
97873471bf0Spatrick
97973471bf0Spatrick// Vector integer operations.
98073471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
98173471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecLoadX, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
98273471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
98373471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecLoadNT, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
98473471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecLoadNTY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
98573471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMaskedLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
98673471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMaskedLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>;
98773471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecStore, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
98873471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecStoreX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
98973471bf0Spatrick
99073471bf0Spatrickdef Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> {
99173471bf0Spatrick  let Latency = 4;
99273471bf0Spatrick  let ResourceCycles = [1];
99373471bf0Spatrick  let NumMicroOps = 1;
99473471bf0Spatrick}
99573471bf0Spatrickdef : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>;
99673471bf0Spatrick
99773471bf0Spatrickdef Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> {
99873471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
99973471bf0Spatrick  let ResourceCycles = [1, 1, 1];
100073471bf0Spatrick  let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
100173471bf0Spatrick}
100273471bf0Spatrickdef : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>;
100373471bf0Spatrick
100473471bf0Spatrickdef Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> {
100573471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
100673471bf0Spatrick  let ResourceCycles = [1, 1, 1];
100773471bf0Spatrick  let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
100873471bf0Spatrick}
100973471bf0Spatrickdef : InstRW<[Zn3WriteVINSERTF128rmr], (instrs VINSERTF128rm)>;
101073471bf0Spatrick
101173471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecStoreY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
101273471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecStoreNT, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
101373471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecStoreNTY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>;
101473471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMaskedStore32, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>;
101573471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMaskedStore64, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [4, 1], 10>;
101673471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMaskedStore32Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [12, 1], 42>;
101773471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMaskedStore64Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>;
101873471bf0Spatrick
101973471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMoveToGpr, [Zn3FPLd01], 1, [2], 1>;
102073471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMoveFromGpr, [Zn3FPLd01], 1, [2], 1>;
102173471bf0Spatrick
102273471bf0Spatrickdef Zn3WriteMOVMMX : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> {
102373471bf0Spatrick  let Latency = 1;
102473471bf0Spatrick  let ResourceCycles = [1, 2];
102573471bf0Spatrick  let NumMicroOps = 2;
102673471bf0Spatrick}
102773471bf0Spatrickdef : InstRW<[Zn3WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>;
102873471bf0Spatrick
102973471bf0Spatrickdef Zn3WriteMOVMMXSlow : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> {
103073471bf0Spatrick  let Latency = 1;
103173471bf0Spatrick  let ResourceCycles = [1, 4];
103273471bf0Spatrick  let NumMicroOps = 2;
103373471bf0Spatrick}
103473471bf0Spatrickdef : InstRW<[Zn3WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>;
103573471bf0Spatrick
103673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecALU, [Zn3FPVAdd0123], 1, [1], 1>;  // Vector integer ALU op, no logicals.
103773471bf0Spatrick
103873471bf0Spatrickdef Zn3WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> {
103973471bf0Spatrick  let Latency = 3;
104073471bf0Spatrick  let ResourceCycles = [1, 1];
104173471bf0Spatrick  let NumMicroOps = 1;
104273471bf0Spatrick}
104373471bf0Spatrickdef : InstRW<[Zn3WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>;
104473471bf0Spatrick
104573471bf0Spatrickdef Zn3WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> {
104673471bf0Spatrick  let Latency = 3;
104773471bf0Spatrick  let ResourceCycles = [1, 1];
104873471bf0Spatrick  let NumMicroOps = 2;
104973471bf0Spatrick}
105073471bf0Spatrickdef : InstRW<[Zn3WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>;
105173471bf0Spatrick
105273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecALUX, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (XMM).
105373471bf0Spatrick
105473471bf0Spatrickdef Zn3WriteVecALUXSlow : SchedWriteRes<[Zn3FPVAdd01]> {
105573471bf0Spatrick  let Latency = 1;
105673471bf0Spatrick  let ResourceCycles = [1];
105773471bf0Spatrick  let NumMicroOps = 1;
105873471bf0Spatrick}
105973471bf0Spatrickdef : InstRW<[Zn3WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr,
106073471bf0Spatrick                                            PADDSBrr, PADDSWrr, PADDUSBrr, PADDUSWrr,
106173471bf0Spatrick                                            PAVGBrr, PAVGWrr,
106273471bf0Spatrick                                            PSIGNBrr, PSIGNDrr, PSIGNWrr,
106373471bf0Spatrick                                            VPABSBrr, VPABSDrr, VPABSWrr,
106473471bf0Spatrick                                            VPADDSBrr, VPADDSWrr, VPADDUSBrr, VPADDUSWrr,
106573471bf0Spatrick                                            VPAVGBrr, VPAVGWrr,
106673471bf0Spatrick                                            VPCMPEQQrr,
106773471bf0Spatrick                                            VPSIGNBrr, VPSIGNDrr, VPSIGNWrr,
106873471bf0Spatrick                                            PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr)>;
106973471bf0Spatrick
107073471bf0Spatrickdef Zn3WriteVecALUXMMX : SchedWriteRes<[Zn3FPVAdd01]> {
107173471bf0Spatrick  let Latency = 1;
107273471bf0Spatrick  let ResourceCycles = [1];
107373471bf0Spatrick  let NumMicroOps = 1;
107473471bf0Spatrick}
107573471bf0Spatrickdef : InstRW<[Zn3WriteVecALUXMMX], (instrs MMX_PABSBrr, MMX_PABSDrr, MMX_PABSWrr,
107673471bf0Spatrick                                           MMX_PSIGNBrr, MMX_PSIGNDrr, MMX_PSIGNWrr,
1077*d415bd75Srobert                                           MMX_PADDSBrr, MMX_PADDSWrr, MMX_PADDUSBrr, MMX_PADDUSWrr,
1078*d415bd75Srobert                                           MMX_PAVGBrr, MMX_PAVGWrr,
1079*d415bd75Srobert                                           MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr)>;
108073471bf0Spatrick
108173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecALUY, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM).
108273471bf0Spatrick
108373471bf0Spatrickdef Zn3WriteVecALUYSlow : SchedWriteRes<[Zn3FPVAdd01]> {
108473471bf0Spatrick  let Latency = 1;
108573471bf0Spatrick  let ResourceCycles = [1];
108673471bf0Spatrick  let NumMicroOps = 1;
108773471bf0Spatrick}
108873471bf0Spatrickdef : InstRW<[Zn3WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr,
108973471bf0Spatrick                                            VPADDSBYrr, VPADDSWYrr, VPADDUSBYrr, VPADDUSWYrr,
109073471bf0Spatrick                                            VPSUBSBYrr, VPSUBSWYrr, VPSUBUSBYrr, VPSUBUSWYrr,
109173471bf0Spatrick                                            VPAVGBYrr, VPAVGWYrr,
109273471bf0Spatrick                                            VPCMPEQQYrr,
109373471bf0Spatrick                                            VPSIGNBYrr, VPSIGNDYrr, VPSIGNWYrr)>;
109473471bf0Spatrick
109573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecALUZ>; // Vector integer ALU op, no logicals (ZMM).
109673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecLogic, [Zn3FPVMisc0123], 1, [1], 1>;  // Vector integer and/or/xor logicals.
109773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecLogicX, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (XMM).
109873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecLogicY, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (YMM).
109973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecLogicZ>; // Vector integer and/or/xor logicals (ZMM).
110073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecTest, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>;  // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions.
110173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecTestY, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis  // Vector integer TEST instructions (YMM).
110273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecTestZ>;  // Vector integer TEST instructions (ZMM).
110373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShift, [Zn3FPVShift01], 1, [1], 1>;  // Vector integer shifts (default).
110473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShiftX, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (XMM).
110573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecShiftY, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (YMM).
110673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecShiftZ>; // Vector integer shifts (ZMM).
110773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShiftImm, [Zn3FPVShift01], 1, [1], 1>;  // Vector integer immediate shifts (default).
110873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShiftImmX, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (XMM).
110973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecShiftImmY, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (YMM).
111073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; // Vector integer immediate shifts (ZMM).
111173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecIMul, [Zn3FPVMul01], 3, [1], 1>;  // Vector integer multiply (default).
111273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecIMulX, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (XMM).
111373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecIMulY, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (YMM).
111473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecIMulZ>; // Vector integer multiply (ZMM).
111573471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePMULLD, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD.
111673471bf0Spatrickdefm : Zn3WriteResYMMPair<WritePMULLDY, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD (YMM).
111773471bf0Spatrickdefm : X86WriteResPairUnsupported<WritePMULLDZ>; // Vector PMULLD (ZMM).
111873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteShuffle, [Zn3FPVShuf01], 1, [1], 1>;  // Vector shuffles.
111973471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteShuffleX, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles (XMM).
112073471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles (YMM).
112173471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteShuffleZ>; // Vector shuffles (ZMM).
112273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarShuffle, [Zn3FPVShuf01], 1, [1], 1>;  // Vector variable shuffles.
112373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarShuffleX, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles (XMM).
112473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVarShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles (YMM).
112573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVarShuffleZ>; // Vector variable shuffles (ZMM).
112673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteBlend, [Zn3FPVMisc0123], 1, [1], 1>; // Vector blends.
112773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteBlendY, [Zn3FPVMisc0123], 1, [1], 1>; // Vector blends (YMM).
112873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteBlendZ>; // Vector blends (ZMM).
112973471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarBlend, [Zn3FPVMul01], 1, [1], 1>; // Vector variable blends.
113073471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVarBlendY, [Zn3FPVMul01], 1, [1], 1>; // Vector variable blends (YMM).
113173471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVarBlendZ>; // Vector variable blends (ZMM).
113273471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePSADBW, [Zn3FPVAdd0123], 3, [2], 1>;  // Vector PSADBW.
113373471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePSADBWX, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (XMM).
113473471bf0Spatrickdefm : Zn3WriteResYMMPair<WritePSADBWY, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (YMM).
113573471bf0Spatrickdefm : X86WriteResPairUnsupported<WritePSADBWZ>; // Vector PSADBW (ZMM).
113673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteMPSAD, [Zn3FPVAdd0123], 4, [8], 4, /*LoadUOps=*/2>; // Vector MPSAD.
113773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteMPSADY, [Zn3FPVAdd0123], 4, [8], 3, /*LoadUOps=*/1>; // Vector MPSAD (YMM).
113873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteMPSADZ>; // Vector MPSAD (ZMM).
113973471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePHMINPOS, [Zn3FPVAdd01], 3, [1], 1>;  // Vector PHMINPOS.
114073471bf0Spatrick
114173471bf0Spatrick// Vector insert/extract operations.
114273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecInsert, [Zn3FPLd01], 1, [2], 2, /*LoadUOps=*/-1>; // Insert gpr to vector element.
114373471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecExtract, [Zn3FPLd01], 1, [2], 2>; // Extract vector element to gpr.
114473471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecExtractSt, [Zn3FPSt, Zn3Store], !add(1, Znver3Model.StoreLatency), [1, 1], 2>; // Extract vector element and store.
114573471bf0Spatrick
114673471bf0Spatrick// MOVMSK operations.
114773471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMOVMSK, [Zn3FPVMisc2], 1, [1], 1>;
114873471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMOVMSK, [Zn3FPVMisc2], 1, [1], 1>;
114973471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMOVMSKY, [Zn3FPVMisc2], 1, [1], 1>;
115073471bf0Spatrickdefm : Zn3WriteResXMM<WriteMMXMOVMSK, [Zn3FPVMisc2], 1, [1], 1>;
115173471bf0Spatrick
115273471bf0Spatrick// Conversion between integer and float.
115373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSD2I, [Zn3FPFCvt01], 2, [2], 2>;  // Double -> Integer.
115473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPD2I, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Integer (XMM).
115573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPD2IY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Integer (YMM).
115673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; // Double -> Integer (ZMM).
115773471bf0Spatrick
115873471bf0Spatrickdef Zn3WriteCvtPD2IMMX : SchedWriteRes<[Zn3FPFCvt01]> {
115973471bf0Spatrick  let Latency = 1;
116073471bf0Spatrick  let ResourceCycles = [2];
116173471bf0Spatrick  let NumMicroOps = 2;
116273471bf0Spatrick}
1163*d415bd75Srobertdef : InstRW<[Zn3WriteCvtPD2IMMX], (instrs MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm, MMX_CVTPD2PIrr, MMX_CVTTPD2PIrr)>;
116473471bf0Spatrick
116573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSS2I, [Zn3FPFCvt01], 2, [2], 2>;  // Float -> Integer.
116673471bf0Spatrick
116773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPS2I, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (XMM).
116873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPS2IY, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (YMM).
116973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; // Float -> Integer (ZMM).
117073471bf0Spatrick
117173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2SD, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>;  // Integer -> Double.
117273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2PD, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Double (XMM).
117373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtI2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double (YMM).
117473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; // Integer -> Double (ZMM).
117573471bf0Spatrick
117673471bf0Spatrickdef Zn3WriteCvtI2PDMMX : SchedWriteRes<[Zn3FPFCvt01]> {
117773471bf0Spatrick  let Latency = 2;
117873471bf0Spatrick  let ResourceCycles = [6];
117973471bf0Spatrick  let NumMicroOps = 2;
118073471bf0Spatrick}
1181*d415bd75Srobertdef : InstRW<[Zn3WriteCvtI2PDMMX], (instrs MMX_CVTPI2PDrm, MMX_CVTPI2PDrr)>;
118273471bf0Spatrick
118373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2SS, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>;  // Integer -> Float.
118473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2PS, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM).
118573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtI2PSY, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (YMM).
118673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; // Integer -> Float (ZMM).
118773471bf0Spatrick
118873471bf0Spatrickdef Zn3WriteCvtI2PSMMX : SchedWriteRes<[Zn3FPFCvt01]> {
118973471bf0Spatrick  let Latency = 3;
119073471bf0Spatrick  let ResourceCycles = [1];
119173471bf0Spatrick  let NumMicroOps = 2;
119273471bf0Spatrick}
1193*d415bd75Srobertdef : InstRW<[Zn3WriteCvtI2PSMMX], (instrs MMX_CVTPI2PSrr)>;
119473471bf0Spatrick
119573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSS2SD, [Zn3FPFCvt01], 3, [1], 1>;  // Float -> Double size conversion.
119673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPS2PD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conversion (XMM).
119773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPS2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Float -> Double size conversion (YMM).
119873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; // Float -> Double size conversion (ZMM).
119973471bf0Spatrick
120073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSD2SS, [Zn3FPFCvt01], 3, [1], 1>;  // Double -> Float size conversion.
120173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPD2PS, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Float size conversion (XMM).
120273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPD2PSY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Float size conversion (YMM).
120373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; // Double -> Float size conversion (ZMM).
120473471bf0Spatrick
120573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPH2PS, [Zn3FPFCvt01], 3, [1], 1>; // Half -> Float size conversion.
120673471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPH2PSY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Half -> Float size conversion (YMM).
120773471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; // Half -> Float size conversion (ZMM).
120873471bf0Spatrick
120973471bf0Spatrickdefm : Zn3WriteResXMM<WriteCvtPS2PH, [Zn3FPFCvt01], 3, [2], 1>; // Float -> Half size conversion.
121073471bf0Spatrickdefm : Zn3WriteResYMM<WriteCvtPS2PHY, [Zn3FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (YMM).
121173471bf0Spatrickdefm : X86WriteResUnsupported<WriteCvtPS2PHZ>; // Float -> Half size conversion (ZMM).
121273471bf0Spatrickdefm : Zn3WriteResXMM<WriteCvtPS2PHSt, [Zn3FPFCvt01, Zn3FPSt, Zn3Store], !add(3, Znver3Model.StoreLatency), [1, 1, 1], 2>; // Float -> Half + store size conversion.
121373471bf0Spatrickdefm : Zn3WriteResYMM<WriteCvtPS2PHYSt, [Zn3FPFCvt01, Zn3FPSt, Zn3Store], !add(6, Znver3Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (YMM).
121473471bf0Spatrickdefm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // Float -> Half + store size conversion (ZMM).
121573471bf0Spatrick
121673471bf0Spatrick// CRC32 instruction.
121773471bf0Spatrickdefm : Zn3WriteResIntPair<WriteCRC32, [Zn3ALU1], 3, [1], 1>;
121873471bf0Spatrick
121973471bf0Spatrickdef Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
122073471bf0Spatrick  let Latency = 2;
122173471bf0Spatrick  let ResourceCycles = [2];
122273471bf0Spatrick  let NumMicroOps = 2;
122373471bf0Spatrick}
122473471bf0Spatrickdef : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
122573471bf0Spatrick
122673471bf0Spatrickdef Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
122773471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency);
122873471bf0Spatrick  let ResourceCycles = [1, 1, 2];
122973471bf0Spatrick  let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0);
123073471bf0Spatrick}
123173471bf0Spatrickdef : InstRW<[Zn3WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>;
123273471bf0Spatrick
123373471bf0Spatrickdef Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> {
123473471bf0Spatrick  let Latency = 1;
123573471bf0Spatrick  let ResourceCycles = [2];
123673471bf0Spatrick  let NumMicroOps = 1;
123773471bf0Spatrick}
123873471bf0Spatrickdef : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
123973471bf0Spatrick
124073471bf0Spatrickdef Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
124173471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
124273471bf0Spatrick  let ResourceCycles = [1, 1, 2];
124373471bf0Spatrick  let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
124473471bf0Spatrick}
124573471bf0Spatrickdef : InstRW<[Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>;
124673471bf0Spatrick
124773471bf0Spatrickdef Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
124873471bf0Spatrick  let Latency = 2;
124973471bf0Spatrick  let ResourceCycles = [3];
125073471bf0Spatrick  let NumMicroOps = 2;
125173471bf0Spatrick}
125273471bf0Spatrickdef : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
125373471bf0Spatrick
125473471bf0Spatrickdef Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
125573471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency);
125673471bf0Spatrick  let ResourceCycles = [1, 1, 3];
125773471bf0Spatrick  let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0);
125873471bf0Spatrick}
125973471bf0Spatrickdef : InstRW<[Zn3Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>;
126073471bf0Spatrick
126173471bf0Spatrickdef Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> {
126273471bf0Spatrick  let Latency = 3;
126373471bf0Spatrick  let ResourceCycles = [8];
126473471bf0Spatrick  let NumMicroOps = 4;
126573471bf0Spatrick}
126673471bf0Spatrickdef : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
126773471bf0Spatrick
126873471bf0Spatrickdef Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
126973471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency);
127073471bf0Spatrick  let ResourceCycles = [1, 1, 8];
127173471bf0Spatrick  let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1);
127273471bf0Spatrick}
127373471bf0Spatrickdef : InstRW<[Zn3WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>;
127473471bf0Spatrick
127573471bf0Spatrickdef Zn3WriteSHA1RNDS4rri : SchedWriteRes<[Zn3FPU0123]> {
127673471bf0Spatrick  let Latency = 6;
127773471bf0Spatrick  let ResourceCycles = [8];
127873471bf0Spatrick  let NumMicroOps = 1;
127973471bf0Spatrick}
128073471bf0Spatrickdef : InstRW<[Zn3WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>;
128173471bf0Spatrick
128273471bf0Spatrickdef Zn3WriteSHA256RNDS2rr : SchedWriteRes<[Zn3FPU0123]> {
128373471bf0Spatrick  let Latency = 4;
128473471bf0Spatrick  let ResourceCycles = [8];
128573471bf0Spatrick  let NumMicroOps = 1;
128673471bf0Spatrick}
128773471bf0Spatrickdef : InstRW<[Zn3WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>;
128873471bf0Spatrick
128973471bf0Spatrick// Strings instructions.
129073471bf0Spatrick// Packed Compare Implicit Length Strings, Return Mask
129173471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpIStrM, [Zn3FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>;
129273471bf0Spatrick// Packed Compare Explicit Length Strings, Return Mask
129373471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpEStrM, [Zn3FPVAdd0123], 6, [12], 7, /*LoadUOps=*/5>;
129473471bf0Spatrick// Packed Compare Implicit Length Strings, Return Index
129573471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpIStrI, [Zn3FPVAdd0123], 2, [8], 4>;
129673471bf0Spatrick// Packed Compare Explicit Length Strings, Return Index
129773471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpEStrI, [Zn3FPVAdd0123], 6, [12], 8, /*LoadUOps=*/4>;
129873471bf0Spatrick
129973471bf0Spatrick// AES instructions.
130073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteAESDecEnc, [Zn3FPAES01], 4, [1], 1>; // Decryption, encryption.
130173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteAESIMC, [Zn3FPAES01], 4, [1], 1>; // InvMixColumn.
130273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteAESKeyGen, [Zn3FPAES01], 4, [1], 1>; // Key Generation.
130373471bf0Spatrick
130473471bf0Spatrick// Carry-less multiplication instructions.
130573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCLMul, [Zn3FPCLM01], 4, [4], 4>;
130673471bf0Spatrick
130773471bf0Spatrick// EMMS/FEMMS
130873471bf0Spatrickdefm : Zn3WriteResInt<WriteEMMS, [Zn3ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis
130973471bf0Spatrick
131073471bf0Spatrick// Load/store MXCSR
131173471bf0Spatrickdefm : Zn3WriteResInt<WriteLDMXCSR, [Zn3AGU012, Zn3Load, Zn3ALU0123], !add(Znver3Model.LoadLatency, 1), [1, 1, 6], 1>; // FIXME: latency not from llvm-exegesis
131273471bf0Spatrickdefm : Zn3WriteResInt<WriteSTMXCSR, [Zn3ALU0123, Zn3AGU012, Zn3Store], !add(1, Znver3Model.StoreLatency), [60, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
131373471bf0Spatrick
131473471bf0Spatrick// Catch-all for expensive system instructions.
131573471bf0Spatrickdefm : Zn3WriteResInt<WriteSystem, [Zn3ALU0123], 100, [100], 100>;
131673471bf0Spatrick
131773471bf0Spatrickdef Zn3WriteVZEROUPPER : SchedWriteRes<[Zn3FPU0123]> {
131873471bf0Spatrick  let Latency = 0; // FIXME: not from llvm-exegesis
131973471bf0Spatrick  let ResourceCycles = [1];
132073471bf0Spatrick  let NumMicroOps = 1;
132173471bf0Spatrick}
132273471bf0Spatrickdef : InstRW<[Zn3WriteVZEROUPPER], (instrs VZEROUPPER)>;
132373471bf0Spatrick
132473471bf0Spatrickdef Zn3WriteVZEROALL : SchedWriteRes<[Zn3FPU0123]> {
132573471bf0Spatrick  let Latency = 10; // FIXME: not from llvm-exegesis
132673471bf0Spatrick  let ResourceCycles = [24];
132773471bf0Spatrick  let NumMicroOps = 18;
132873471bf0Spatrick}
132973471bf0Spatrickdef : InstRW<[Zn3WriteVZEROALL], (instrs VZEROALL)>;
133073471bf0Spatrick
133173471bf0Spatrick// AVX2.
133273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFShuffle256, [Zn3FPVShuf], 2, [1], 1, /*LoadUOps=*/2>; // Fp 256-bit width vector shuffles.
133373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFVarShuffle256, [Zn3FPVShuf], 7, [1], 2, /*LoadUOps=*/1>; // Fp 256-bit width variable shuffles.
133473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteShuffle256, [Zn3FPVShuf], 2, [1], 1>; // 256-bit width vector shuffles.
133573471bf0Spatrick
133673471bf0Spatrickdef Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> {
133773471bf0Spatrick  let Latency = 3;
133873471bf0Spatrick  let ResourceCycles = [1];
133973471bf0Spatrick  let NumMicroOps = 1;
134073471bf0Spatrick}
134173471bf0Spatrickdef : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>;
134273471bf0Spatrick
134373471bf0Spatrickdef Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
134473471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
134573471bf0Spatrick  let ResourceCycles = [1, 1, 1];
134673471bf0Spatrick  let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
134773471bf0Spatrick}
134873471bf0Spatrickdef : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rm)>;
134973471bf0Spatrick
135073471bf0Spatrickdef Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
1351*d415bd75Srobert  let Latency = !add(Znver3Model.LoadLatency, 7);
135273471bf0Spatrick  let ResourceCycles = [1, 1, 2];
1353*d415bd75Srobert  let NumMicroOps = 3;
135473471bf0Spatrick}
135573471bf0Spatrickdef : InstRW<[Zn3WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
135673471bf0Spatrick
135773471bf0Spatrickdef Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> {
135873471bf0Spatrick  let Latency = 6;
135973471bf0Spatrick  let ResourceCycles = [1];
136073471bf0Spatrick  let NumMicroOps = 2;
136173471bf0Spatrick}
136273471bf0Spatrickdef : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
136373471bf0Spatrick
136473471bf0Spatrickdef Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
136573471bf0Spatrick  let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency);
136673471bf0Spatrick  let ResourceCycles = [1, 1, 2];
136773471bf0Spatrick  let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1);
136873471bf0Spatrick}
136973471bf0Spatrickdef : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
137073471bf0Spatrick
1371*d415bd75Srobertdef Zn3WriteVPERMDYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
1372*d415bd75Srobert  let Latency = !add(Znver3Model.LoadLatency, 5);
1373*d415bd75Srobert  let ResourceCycles = [1, 1, 2];
137473471bf0Spatrick  let NumMicroOps = 2;
137573471bf0Spatrick}
1376*d415bd75Srobertdef : InstRW<[Zn3WriteVPERMDYm], (instrs VPERMQYmi, VPERMDYrm)>;
137773471bf0Spatrick
137873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVPMOV256, [Zn3FPVShuf01], 4, [3], 2, /*LoadUOps=*/-1>; // 256-bit width packed vector width-changing move.
1379*d415bd75Srobertdefm : Zn3WriteResYMMPair<WriteVarShuffle256, [Zn3FPVShuf], 5, [1], 2, /*LoadUOps=*/1>; // 256-bit width vector variable shuffles.
138073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarVecShift, [Zn3FPVShift01], 1, [1], 1>; // Variable vector shifts.
138173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVarVecShiftY, [Zn3FPVShift01], 1, [1], 1>; // Variable vector shifts (YMM).
138273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; // Variable vector shifts (ZMM).
138373471bf0Spatrick
138473471bf0Spatrick// Old microcoded instructions that nobody use.
138573471bf0Spatrickdefm : Zn3WriteResInt<WriteMicrocoded, [Zn3ALU0123], 100, [100], 100>;
138673471bf0Spatrick
138773471bf0Spatrick// Fence instructions.
138873471bf0Spatrickdefm : Zn3WriteResInt<WriteFence, [Zn3ALU0123], 1, [100], 1>;
138973471bf0Spatrick
139073471bf0Spatrickdef Zn3WriteLFENCE : SchedWriteRes<[Zn3LSU]> {
139173471bf0Spatrick  let Latency = 1;
139273471bf0Spatrick  let ResourceCycles = [30];
139373471bf0Spatrick  let NumMicroOps = 1;
139473471bf0Spatrick}
139573471bf0Spatrickdef : InstRW<[Zn3WriteLFENCE], (instrs LFENCE)>;
139673471bf0Spatrick
139773471bf0Spatrickdef Zn3WriteSFENCE : SchedWriteRes<[Zn3LSU]> {
139873471bf0Spatrick  let Latency = 1;
139973471bf0Spatrick  let ResourceCycles = [1];
140073471bf0Spatrick  let NumMicroOps = 1;
140173471bf0Spatrick}
140273471bf0Spatrickdef : InstRW<[Zn3WriteSFENCE], (instrs SFENCE)>;
140373471bf0Spatrick
140473471bf0Spatrick// Nop, not very useful expect it provides a model for nops!
140573471bf0Spatrickdefm : Zn3WriteResInt<WriteNop, [Zn3ALU0123], 0, [1], 1>; // FIXME: latency not from llvm-exegesis
140673471bf0Spatrick
140773471bf0Spatrick
140873471bf0Spatrick///////////////////////////////////////////////////////////////////////////////
140973471bf0Spatrick// Zero Cycle Move
141073471bf0Spatrick///////////////////////////////////////////////////////////////////////////////
141173471bf0Spatrick
141273471bf0Spatrickdef Zn3WriteZeroLatency : SchedWriteRes<[]> {
141373471bf0Spatrick  let Latency = 0;
141473471bf0Spatrick  let ResourceCycles = [];
141573471bf0Spatrick  let NumMicroOps = 1;
141673471bf0Spatrick}
141773471bf0Spatrickdef : InstRW<[Zn3WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
141873471bf0Spatrick                                               MOV64rr, MOV64rr_REV,
141973471bf0Spatrick                                               MOVSX32rr32)>;
142073471bf0Spatrick
142173471bf0Spatrickdef Zn3WriteSwapRenameable : SchedWriteRes<[]> {
142273471bf0Spatrick  let Latency = 0;
142373471bf0Spatrick  let ResourceCycles = [];
142473471bf0Spatrick  let NumMicroOps = 2;
142573471bf0Spatrick}
142673471bf0Spatrickdef : InstRW<[Zn3WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar,
142773471bf0Spatrick                                               XCHG64rr, XCHG64ar)>;
142873471bf0Spatrick
142973471bf0Spatrickdefm : Zn3WriteResInt<WriteXCHG, [Zn3ALU0123], 0, [8], 2>;        // Compare+Exchange - TODO RMW support.
143073471bf0Spatrick
143173471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMove, [Zn3FPVMisc0123], 1, [1], 1>; // Empty sched class
143273471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMoveX, [], 0, [], 1>;
143373471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMoveY, [], 0, [], 1>;
1434*d415bd75Srobertdefm : X86WriteResUnsupported<WriteFMoveZ>;
143573471bf0Spatrick
143673471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMove, [Zn3FPFMisc0123], 1, [1], 1>; // MMX
143773471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMoveX, [], 0, [], 1>;
143873471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMoveY, [], 0, [], 1>;
1439*d415bd75Srobertdefm : X86WriteResUnsupported<WriteVecMoveZ>;
144073471bf0Spatrick
144173471bf0Spatrickdef : IsOptimizableRegisterMove<[
144273471bf0Spatrick  InstructionEquivalenceClass<[
144373471bf0Spatrick    // GPR variants.
144473471bf0Spatrick    MOV32rr, MOV32rr_REV,
144573471bf0Spatrick    MOV64rr, MOV64rr_REV,
144673471bf0Spatrick    MOVSX32rr32,
144773471bf0Spatrick    XCHG32rr, XCHG32ar,
144873471bf0Spatrick    XCHG64rr, XCHG64ar,
144973471bf0Spatrick
145073471bf0Spatrick    // MMX variants.
145173471bf0Spatrick    // MMX moves are *NOT* eliminated.
145273471bf0Spatrick
145373471bf0Spatrick    // SSE variants.
145473471bf0Spatrick    MOVAPSrr, MOVAPSrr_REV,
145573471bf0Spatrick    MOVUPSrr, MOVUPSrr_REV,
145673471bf0Spatrick    MOVAPDrr, MOVAPDrr_REV,
145773471bf0Spatrick    MOVUPDrr, MOVUPDrr_REV,
145873471bf0Spatrick    MOVDQArr, MOVDQArr_REV,
145973471bf0Spatrick    MOVDQUrr, MOVDQUrr_REV,
146073471bf0Spatrick
146173471bf0Spatrick    // AVX variants.
146273471bf0Spatrick    VMOVAPSrr, VMOVAPSrr_REV,
146373471bf0Spatrick    VMOVUPSrr, VMOVUPSrr_REV,
146473471bf0Spatrick    VMOVAPDrr, VMOVAPDrr_REV,
146573471bf0Spatrick    VMOVUPDrr, VMOVUPDrr_REV,
146673471bf0Spatrick    VMOVDQArr, VMOVDQArr_REV,
146773471bf0Spatrick    VMOVDQUrr, VMOVDQUrr_REV,
146873471bf0Spatrick
146973471bf0Spatrick    // AVX YMM variants.
147073471bf0Spatrick    VMOVAPSYrr, VMOVAPSYrr_REV,
147173471bf0Spatrick    VMOVUPSYrr, VMOVUPSYrr_REV,
147273471bf0Spatrick    VMOVAPDYrr, VMOVAPDYrr_REV,
147373471bf0Spatrick    VMOVUPDYrr, VMOVUPDYrr_REV,
147473471bf0Spatrick    VMOVDQAYrr, VMOVDQAYrr_REV,
147573471bf0Spatrick    VMOVDQUYrr, VMOVDQUYrr_REV,
147673471bf0Spatrick  ], TruePred >
147773471bf0Spatrick]>;
147873471bf0Spatrick
147973471bf0Spatrick///////////////////////////////////////////////////////////////////////////////
148073471bf0Spatrick// Dependency breaking instructions.
148173471bf0Spatrick///////////////////////////////////////////////////////////////////////////////
148273471bf0Spatrick
148373471bf0Spatrickdef Zn3WriteZeroIdiom : SchedWriteVariant<[
148473471bf0Spatrick    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
148573471bf0Spatrick    SchedVar<NoSchedPred,                          [WriteALU]>
148673471bf0Spatrick]>;
148773471bf0Spatrickdef : InstRW<[Zn3WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV,
148873471bf0Spatrick                                          XOR64rr, XOR64rr_REV,
148973471bf0Spatrick                                          SUB32rr, SUB32rr_REV,
149073471bf0Spatrick                                          SUB64rr, SUB64rr_REV)>;
149173471bf0Spatrick
149273471bf0Spatrickdef Zn3WriteZeroIdiomEFLAGS : SchedWriteVariant<[
149373471bf0Spatrick    SchedVar<MCSchedPredicate<CheckSameRegOperand<0, 1>>, [Zn3WriteZeroLatency]>,
149473471bf0Spatrick    SchedVar<NoSchedPred,                                 [WriteALU]>
149573471bf0Spatrick]>;
149673471bf0Spatrickdef : InstRW<[Zn3WriteZeroIdiomEFLAGS], (instrs CMP8rr,  CMP8rr_REV,
149773471bf0Spatrick                                                CMP16rr, CMP16rr_REV,
149873471bf0Spatrick                                                CMP32rr, CMP32rr_REV,
149973471bf0Spatrick                                                CMP64rr, CMP64rr_REV)>;
150073471bf0Spatrick
150173471bf0Spatrickdef Zn3WriteFZeroIdiom : SchedWriteVariant<[
150273471bf0Spatrick    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
150373471bf0Spatrick    SchedVar<NoSchedPred,                          [WriteFLogic]>
150473471bf0Spatrick]>;
150573471bf0Spatrick// NOTE: XORPSrr, XORPDrr are not zero-cycle!
150673471bf0Spatrickdef : InstRW<[Zn3WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr,
150773471bf0Spatrick                                           VANDNPSrr, VANDNPDrr)>;
150873471bf0Spatrick
150973471bf0Spatrickdef Zn3WriteFZeroIdiomY : SchedWriteVariant<[
151073471bf0Spatrick    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
151173471bf0Spatrick    SchedVar<NoSchedPred,                          [WriteFLogicY]>
151273471bf0Spatrick]>;
151373471bf0Spatrickdef : InstRW<[Zn3WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
151473471bf0Spatrick                                            VANDNPSYrr, VANDNPDYrr)>;
151573471bf0Spatrick
151673471bf0Spatrickdef Zn3WriteVZeroIdiomLogicX : SchedWriteVariant<[
151773471bf0Spatrick    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
151873471bf0Spatrick    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
151973471bf0Spatrick]>;
152073471bf0Spatrick// NOTE: PXORrr,PANDNrr are not zero-cycle!
152173471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
152273471bf0Spatrick
152373471bf0Spatrickdef Zn3WriteVZeroIdiomLogicY : SchedWriteVariant<[
152473471bf0Spatrick    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
152573471bf0Spatrick    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
152673471bf0Spatrick]>;
152773471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>;
152873471bf0Spatrick
152973471bf0Spatrickdef Zn3WriteVZeroIdiomALUX : SchedWriteVariant<[
153073471bf0Spatrick    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
153173471bf0Spatrick    SchedVar<NoSchedPred,                          [WriteVecALUX]>
153273471bf0Spatrick]>;
153373471bf0Spatrick// NOTE: PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
153473471bf0Spatrick//       PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle!
153573471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomALUX],
153673471bf0Spatrick             (instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
153773471bf0Spatrick                     VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>;
153873471bf0Spatrick
153973471bf0Spatrickdef Zn3WriteVZeroIdiomALUY : SchedWriteVariant<[
154073471bf0Spatrick    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
154173471bf0Spatrick    SchedVar<NoSchedPred,                          [WriteVecALUY]>
154273471bf0Spatrick]>;
154373471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomALUY],
154473471bf0Spatrick             (instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
154573471bf0Spatrick                     VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>;
154673471bf0Spatrick
154773471bf0Spatrickdef : IsZeroIdiomFunction<[
154873471bf0Spatrick  // GPR Zero-idioms.
154973471bf0Spatrick  DepBreakingClass<[ XOR32rr, XOR32rr_REV,
155073471bf0Spatrick                     XOR64rr, XOR64rr_REV,
155173471bf0Spatrick                     SUB32rr, SUB32rr_REV,
155273471bf0Spatrick                     SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>,
155373471bf0Spatrick
155473471bf0Spatrick  // SSE XMM Zero-idioms.
155573471bf0Spatrick  DepBreakingClass<[
155673471bf0Spatrick    // fp variants.
155773471bf0Spatrick    XORPSrr, XORPDrr,
155873471bf0Spatrick    ANDNPSrr, ANDNPDrr,
155973471bf0Spatrick
156073471bf0Spatrick    // int variants.
156173471bf0Spatrick    PXORrr,
156273471bf0Spatrick    PANDNrr,
156373471bf0Spatrick    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
156473471bf0Spatrick    PSUBSBrr, PSUBSWrr,
156573471bf0Spatrick    PSUBUSBrr, PSUBUSWrr,
156673471bf0Spatrick    PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr
156773471bf0Spatrick  ], ZeroIdiomPredicate>,
156873471bf0Spatrick
156973471bf0Spatrick  // AVX XMM Zero-idioms.
157073471bf0Spatrick  DepBreakingClass<[
157173471bf0Spatrick    // fp variants.
157273471bf0Spatrick    VXORPSrr, VXORPDrr,
157373471bf0Spatrick    VANDNPSrr, VANDNPDrr,
157473471bf0Spatrick
157573471bf0Spatrick    // int variants.
157673471bf0Spatrick    VPXORrr,
157773471bf0Spatrick    VPANDNrr,
157873471bf0Spatrick    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
157973471bf0Spatrick    VPSUBSBrr, VPSUBSWrr,
158073471bf0Spatrick    VPSUBUSBrr, VPSUBUSWrr,
158173471bf0Spatrick    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
158273471bf0Spatrick  ], ZeroIdiomPredicate>,
158373471bf0Spatrick
158473471bf0Spatrick  // AVX YMM Zero-idioms.
158573471bf0Spatrick  DepBreakingClass<[
158673471bf0Spatrick    // fp variants.
158773471bf0Spatrick    VXORPSYrr, VXORPDYrr,
158873471bf0Spatrick    VANDNPSYrr, VANDNPDYrr,
158973471bf0Spatrick
159073471bf0Spatrick    // int variants.
159173471bf0Spatrick    VPXORYrr,
159273471bf0Spatrick    VPANDNYrr,
159373471bf0Spatrick    VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
159473471bf0Spatrick    VPSUBSBYrr, VPSUBSWYrr,
159573471bf0Spatrick    VPSUBUSBYrr, VPSUBUSWYrr,
159673471bf0Spatrick    VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
159773471bf0Spatrick  ], ZeroIdiomPredicate>,
159873471bf0Spatrick]>;
159973471bf0Spatrick
160073471bf0Spatrickdef : IsDepBreakingFunction<[
160173471bf0Spatrick  // GPR
160273471bf0Spatrick  DepBreakingClass<[ SBB32rr, SBB32rr_REV,
160373471bf0Spatrick                     SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>,
160473471bf0Spatrick  DepBreakingClass<[ CMP8rr,  CMP8rr_REV,
160573471bf0Spatrick                     CMP16rr, CMP16rr_REV,
160673471bf0Spatrick                     CMP32rr, CMP32rr_REV,
160773471bf0Spatrick                     CMP64rr, CMP64rr_REV ], CheckSameRegOperand<0, 1> >,
160873471bf0Spatrick
160973471bf0Spatrick  // MMX
161073471bf0Spatrick  DepBreakingClass<[
1611*d415bd75Srobert    MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
161273471bf0Spatrick  ], ZeroIdiomPredicate>,
161373471bf0Spatrick
161473471bf0Spatrick  // SSE
161573471bf0Spatrick  DepBreakingClass<[
161673471bf0Spatrick    PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
161773471bf0Spatrick  ], ZeroIdiomPredicate>,
161873471bf0Spatrick
161973471bf0Spatrick  // AVX XMM
162073471bf0Spatrick  DepBreakingClass<[
162173471bf0Spatrick    VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
162273471bf0Spatrick  ], ZeroIdiomPredicate>,
162373471bf0Spatrick
162473471bf0Spatrick  // AVX YMM
162573471bf0Spatrick  DepBreakingClass<[
162673471bf0Spatrick    VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
162773471bf0Spatrick  ], ZeroIdiomPredicate>,
162873471bf0Spatrick]>;
162973471bf0Spatrick
163073471bf0Spatrick} // SchedModel
1631