173471bf0Spatrick//=- X86ScheduleZnver3.td - X86 Znver3 Scheduling ------------*- tablegen -*-=// 273471bf0Spatrick// 373471bf0Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 473471bf0Spatrick// See https://llvm.org/LICENSE.txt for license information. 573471bf0Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 673471bf0Spatrick// 773471bf0Spatrick//===----------------------------------------------------------------------===// 873471bf0Spatrick// 973471bf0Spatrick// This file defines the machine model for Znver3 to support instruction 1073471bf0Spatrick// scheduling and other instruction cost heuristics. 1173471bf0Spatrick// Based on: 1273471bf0Spatrick// * AMD Software Optimization Guide for AMD Family 19h Processors. 1373471bf0Spatrick// https://www.amd.com/system/files/TechDocs/56665.zip 1473471bf0Spatrick// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 1573471bf0Spatrick// http://www.agner.org/optimize/microarchitecture.pdf 1673471bf0Spatrick// * AMD Zen 3 Ryzen Deep Dive Review 1773471bf0Spatrick// https://www.anandtech.com/show/16214/ 1873471bf0Spatrick//===----------------------------------------------------------------------===// 1973471bf0Spatrick 2073471bf0Spatrickdef Znver3Model : SchedMachineModel { 2173471bf0Spatrick // AMD SOG 19h, 2.9.6 Dispatch 2273471bf0Spatrick // The processor may dispatch up to 6 macro ops per cycle 2373471bf0Spatrick // into the execution engine. 2473471bf0Spatrick let IssueWidth = 6; 2573471bf0Spatrick // AMD SOG 19h, 2.10.3 2673471bf0Spatrick // The retire control unit (RCU) tracks the completion status of all 2773471bf0Spatrick // outstanding operations (integer, load/store, and floating-point) and is 2873471bf0Spatrick // the final arbiter for exception processing and recovery. 2973471bf0Spatrick // The unit can receive up to 6 macro ops dispatched per cycle and track up 3073471bf0Spatrick // to 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. 3173471bf0Spatrick let MicroOpBufferSize = 256; 3273471bf0Spatrick // AMD SOG 19h, 2.9.1 Op Cache 3373471bf0Spatrick // The op cache is organized as an associative cache with 64 sets and 8 ways. 3473471bf0Spatrick // At each set-way intersection is an entry containing up to 8 macro ops. 3573471bf0Spatrick // The maximum capacity of the op cache is 4K ops. 3673471bf0Spatrick // Agner, 22.5 µop cache 3773471bf0Spatrick // The size of the µop cache is big enough for holding most critical loops. 3873471bf0Spatrick // FIXME: PR50584: MachineScheduler/PostRAScheduler have quadradic complexity, 3973471bf0Spatrick // with large values here the compilation of certain loops 4073471bf0Spatrick // ends up taking way too long. 4173471bf0Spatrick // let LoopMicroOpBufferSize = 4096; 4273471bf0Spatrick let LoopMicroOpBufferSize = 512; 4373471bf0Spatrick // AMD SOG 19h, 2.6.2 L1 Data Cache 4473471bf0Spatrick // The L1 data cache has a 4- or 5- cycle integer load-to-use latency. 4573471bf0Spatrick // AMD SOG 19h, 2.12 L1 Data Cache 4673471bf0Spatrick // The AGU and LS pipelines are optimized for simple address generation modes. 4773471bf0Spatrick // <...> and can achieve 4-cycle load-to-use integer load latency. 4873471bf0Spatrick let LoadLatency = 4; 4973471bf0Spatrick // AMD SOG 19h, 2.12 L1 Data Cache 5073471bf0Spatrick // The AGU and LS pipelines are optimized for simple address generation modes. 5173471bf0Spatrick // <...> and can achieve <...> 7-cycle load-to-use FP load latency. 5273471bf0Spatrick int VecLoadLatency = 7; 5373471bf0Spatrick // Latency of a simple store operation. 5473471bf0Spatrick int StoreLatency = 1; 5573471bf0Spatrick // FIXME 5673471bf0Spatrick let HighLatency = 25; // FIXME: any better choice? 5773471bf0Spatrick // AMD SOG 19h, 2.8 Optimizing Branching 5873471bf0Spatrick // The branch misprediction penalty is in the range from 11 to 18 cycles, 5973471bf0Spatrick // <...>. The common case penalty is 13 cycles. 6073471bf0Spatrick let MispredictPenalty = 13; 6173471bf0Spatrick 6273471bf0Spatrick let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 6373471bf0Spatrick 6473471bf0Spatrick let CompleteModel = 1; 6573471bf0Spatrick} 6673471bf0Spatrick 6773471bf0Spatricklet SchedModel = Znver3Model in { 6873471bf0Spatrick 6973471bf0Spatrick 7073471bf0Spatrick//===----------------------------------------------------------------------===// 7173471bf0Spatrick// RCU 7273471bf0Spatrick//===----------------------------------------------------------------------===// 7373471bf0Spatrick 7473471bf0Spatrick// AMD SOG 19h, 2.10.3 Retire Control Unit 7573471bf0Spatrick// The unit can receive up to 6 macro ops dispatched per cycle and track up to 7673471bf0Spatrick// 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...> 7773471bf0Spatrick// The retire unit handles in-order commit of up to eight macro ops per cycle. 7873471bf0Spatrickdef Zn3RCU : RetireControlUnit<Znver3Model.MicroOpBufferSize, 8>; 7973471bf0Spatrick 8073471bf0Spatrick//===----------------------------------------------------------------------===// 8173471bf0Spatrick// Units 8273471bf0Spatrick//===----------------------------------------------------------------------===// 8373471bf0Spatrick 8473471bf0Spatrick// There are total of three Units, each one with it's own schedulers. 8573471bf0Spatrick 8673471bf0Spatrick//===----------------------------------------------------------------------===// 8773471bf0Spatrick// Integer Execution Unit 8873471bf0Spatrick// 8973471bf0Spatrick 9073471bf0Spatrick// AMD SOG 19h, 2.4 Superscalar Organization 9173471bf0Spatrick// The processor uses four decoupled independent integer scheduler queues, 9273471bf0Spatrick// each one servicing one ALU pipeline and one or two other pipelines 9373471bf0Spatrick 9473471bf0Spatrick// 9573471bf0Spatrick// Execution pipes 9673471bf0Spatrick//===----------------------------------------------------------------------===// 9773471bf0Spatrick 9873471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units 9973471bf0Spatrick// The processor contains 4 general purpose integer execution pipes. 10073471bf0Spatrick// Each pipe has an ALU capable of general purpose integer operations. 10173471bf0Spatrickdef Zn3ALU0 : ProcResource<1>; 10273471bf0Spatrickdef Zn3ALU1 : ProcResource<1>; 10373471bf0Spatrickdef Zn3ALU2 : ProcResource<1>; 10473471bf0Spatrickdef Zn3ALU3 : ProcResource<1>; 10573471bf0Spatrick 10673471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units 10773471bf0Spatrick// There is also a separate branch execution unit. 10873471bf0Spatrickdef Zn3BRU1 : ProcResource<1>; 10973471bf0Spatrick 11073471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units 11173471bf0Spatrick// There are three Address Generation Units (AGUs) for all load and store 11273471bf0Spatrick// address generation. There are also 3 store data movement units 11373471bf0Spatrick// associated with the same schedulers as the AGUs. 11473471bf0Spatrickdef Zn3AGU0 : ProcResource<1>; 11573471bf0Spatrickdef Zn3AGU1 : ProcResource<1>; 11673471bf0Spatrickdef Zn3AGU2 : ProcResource<1>; 11773471bf0Spatrick 11873471bf0Spatrick// 11973471bf0Spatrick// Execution Units 12073471bf0Spatrick//===----------------------------------------------------------------------===// 12173471bf0Spatrick 12273471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units 12373471bf0Spatrick// ALU0 additionally has divide <...> execution capability. 12473471bf0Spatrickdefvar Zn3Divider = Zn3ALU0; 12573471bf0Spatrick 12673471bf0Spatrick// AMD SOG 19h, 2.10.2 Execution Units 12773471bf0Spatrick// ALU0 additionally has <...> branch execution capability. 12873471bf0Spatrickdefvar Zn3BRU0 = Zn3ALU0; 12973471bf0Spatrick 13073471bf0Spatrick// Integer Multiplication issued on ALU1. 13173471bf0Spatrickdefvar Zn3Multiplier = Zn3ALU1; 13273471bf0Spatrick 13373471bf0Spatrick// Execution pipeline grouping 13473471bf0Spatrick//===----------------------------------------------------------------------===// 13573471bf0Spatrick 13673471bf0Spatrick// General ALU operations 13773471bf0Spatrickdef Zn3ALU0123 : ProcResGroup<[Zn3ALU0, Zn3ALU1, Zn3ALU2, Zn3ALU3]>; 13873471bf0Spatrick 13973471bf0Spatrick// General AGU operations 14073471bf0Spatrickdef Zn3AGU012 : ProcResGroup<[Zn3AGU0, Zn3AGU1, Zn3AGU2]>; 14173471bf0Spatrick 14273471bf0Spatrick// Control flow: jumps, calls 14373471bf0Spatrickdef Zn3BRU01 : ProcResGroup<[Zn3BRU0, Zn3BRU1]>; 14473471bf0Spatrick 14573471bf0Spatrick// Everything that isn't control flow, but still needs to access CC register, 14673471bf0Spatrick// namely: conditional moves, SETcc. 14773471bf0Spatrickdef Zn3ALU03 : ProcResGroup<[Zn3ALU0, Zn3ALU3]>; 14873471bf0Spatrick 14973471bf0Spatrick// Zn3ALU1 handles complex bit twiddling: CRC/PDEP/PEXT 15073471bf0Spatrick 15173471bf0Spatrick// Simple bit twiddling: bit test, shift/rotate, bit extraction 15273471bf0Spatrickdef Zn3ALU12 : ProcResGroup<[Zn3ALU1, Zn3ALU2]>; 15373471bf0Spatrick 15473471bf0Spatrick 15573471bf0Spatrick// 15673471bf0Spatrick// Scheduling 15773471bf0Spatrick//===----------------------------------------------------------------------===// 15873471bf0Spatrick 15973471bf0Spatrick// AMD SOG 19h, 2.10.3 Retire Control Unit 16073471bf0Spatrick// The integer physical register file (PRF) consists of 192 registers. 16173471bf0Spatrickdef Zn3IntegerPRF : RegisterFile<192, [GR64, CCR], [1, 1], [1, 0], 16273471bf0Spatrick 6, // Max moves that can be eliminated per cycle. 16373471bf0Spatrick 0>; // Restrict move elimination to zero regs. 16473471bf0Spatrick 16573471bf0Spatrick// anandtech, The integer scheduler has a 4*24 entry macro op capacity. 16673471bf0Spatrick// AMD SOG 19h, 2.10.1 Schedulers 16773471bf0Spatrick// The schedulers can receive up to six macro ops per cycle, with a limit of 16873471bf0Spatrick// two per scheduler. Each scheduler can issue one micro op per cycle into 16973471bf0Spatrick// each of its associated pipelines 17073471bf0Spatrick// FIXME: these are 4 separate schedulers, not a single big one. 17173471bf0Spatrickdef Zn3Int : ProcResGroup<[Zn3ALU0, Zn3AGU0, Zn3BRU0, // scheduler 0 17273471bf0Spatrick Zn3ALU1, Zn3AGU1, // scheduler 1 17373471bf0Spatrick Zn3ALU2, Zn3AGU2, // scheduler 2 17473471bf0Spatrick Zn3ALU3, Zn3BRU1 // scheduler 3 17573471bf0Spatrick ]> { 17673471bf0Spatrick let BufferSize = !mul(4, 24); 17773471bf0Spatrick} 17873471bf0Spatrick 17973471bf0Spatrick 18073471bf0Spatrick//===----------------------------------------------------------------------===// 18173471bf0Spatrick// Floating-Point Unit 18273471bf0Spatrick// 18373471bf0Spatrick 18473471bf0Spatrick// AMD SOG 19h, 2.4 Superscalar Organization 18573471bf0Spatrick// The processor uses <...> two decoupled independent floating point schedulers 18673471bf0Spatrick// each servicing two FP pipelines and one store or FP-to-integer pipeline. 18773471bf0Spatrick 18873471bf0Spatrick// 18973471bf0Spatrick// Execution pipes 19073471bf0Spatrick//===----------------------------------------------------------------------===// 19173471bf0Spatrick 19273471bf0Spatrick// AMD SOG 19h, 2.10.1 Schedulers 19373471bf0Spatrick// <...>, and six FPU pipes. 19473471bf0Spatrick// Agner, 22.10 Floating point execution pipes 19573471bf0Spatrick// There are six floating point/vector execution pipes, 19673471bf0Spatrickdef Zn3FPP0 : ProcResource<1>; 19773471bf0Spatrickdef Zn3FPP1 : ProcResource<1>; 19873471bf0Spatrickdef Zn3FPP2 : ProcResource<1>; 19973471bf0Spatrickdef Zn3FPP3 : ProcResource<1>; 20073471bf0Spatrickdef Zn3FPP45 : ProcResource<2>; 20173471bf0Spatrick 20273471bf0Spatrick// 20373471bf0Spatrick// Execution Units 20473471bf0Spatrick//===----------------------------------------------------------------------===// 20573471bf0Spatrick// AMD SOG 19h, 2.11.1 Floating Point Execution Resources 20673471bf0Spatrick 20773471bf0Spatrick// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) 20873471bf0Spatrickdefvar Zn3FPFMul0 = Zn3FPP0; 20973471bf0Spatrickdefvar Zn3FPFMul1 = Zn3FPP1; 21073471bf0Spatrick 21173471bf0Spatrick// (v)FADD* 21273471bf0Spatrickdefvar Zn3FPFAdd0 = Zn3FPP2; 21373471bf0Spatrickdefvar Zn3FPFAdd1 = Zn3FPP3; 21473471bf0Spatrick 21573471bf0Spatrick// All convert operations except pack/unpack 21673471bf0Spatrickdefvar Zn3FPFCvt0 = Zn3FPP2; 21773471bf0Spatrickdefvar Zn3FPFCvt1 = Zn3FPP3; 21873471bf0Spatrick 21973471bf0Spatrick// All Divide and Square Root except Reciprocal Approximation 22073471bf0Spatrick// AMD SOG 19h, 2.11.1 Floating Point Execution Resources 22173471bf0Spatrick// FDIV unit can support 2 simultaneous operations in flight 22273471bf0Spatrick// even though it occupies a single pipe. 22373471bf0Spatrick// FIXME: BufferSize=2 ? 22473471bf0Spatrickdefvar Zn3FPFDiv = Zn3FPP1; 22573471bf0Spatrick 22673471bf0Spatrick// Moves and Logical operations on Floating Point Data Types 22773471bf0Spatrickdefvar Zn3FPFMisc0 = Zn3FPP0; 22873471bf0Spatrickdefvar Zn3FPFMisc1 = Zn3FPP1; 22973471bf0Spatrickdefvar Zn3FPFMisc2 = Zn3FPP2; 23073471bf0Spatrickdefvar Zn3FPFMisc3 = Zn3FPP3; 23173471bf0Spatrick 23273471bf0Spatrick// Integer Adds, Subtracts, and Compares 23373471bf0Spatrick// Some complex VADD operations are not available in all pipes. 23473471bf0Spatrickdefvar Zn3FPVAdd0 = Zn3FPP0; 23573471bf0Spatrickdefvar Zn3FPVAdd1 = Zn3FPP1; 23673471bf0Spatrickdefvar Zn3FPVAdd2 = Zn3FPP2; 23773471bf0Spatrickdefvar Zn3FPVAdd3 = Zn3FPP3; 23873471bf0Spatrick 23973471bf0Spatrick// Integer Multiplies, SAD, Blendvb 24073471bf0Spatrickdefvar Zn3FPVMul0 = Zn3FPP0; 24173471bf0Spatrickdefvar Zn3FPVMul1 = Zn3FPP3; 24273471bf0Spatrick 24373471bf0Spatrick// Data Shuffles, Packs, Unpacks, Permute 24473471bf0Spatrick// Some complex shuffle operations are only available in pipe1. 24573471bf0Spatrickdefvar Zn3FPVShuf = Zn3FPP1; 24673471bf0Spatrickdefvar Zn3FPVShufAux = Zn3FPP2; 24773471bf0Spatrick 24873471bf0Spatrick// Bit Shift Left/Right operations 24973471bf0Spatrickdefvar Zn3FPVShift0 = Zn3FPP1; 25073471bf0Spatrickdefvar Zn3FPVShift1 = Zn3FPP2; 25173471bf0Spatrick 25273471bf0Spatrick// Moves and Logical operations on Packed Integer Data Types 25373471bf0Spatrickdefvar Zn3FPVMisc0 = Zn3FPP0; 25473471bf0Spatrickdefvar Zn3FPVMisc1 = Zn3FPP1; 25573471bf0Spatrickdefvar Zn3FPVMisc2 = Zn3FPP2; 25673471bf0Spatrickdefvar Zn3FPVMisc3 = Zn3FPP3; 25773471bf0Spatrick 25873471bf0Spatrick// *AES* 25973471bf0Spatrickdefvar Zn3FPAES0 = Zn3FPP0; 26073471bf0Spatrickdefvar Zn3FPAES1 = Zn3FPP1; 26173471bf0Spatrick 26273471bf0Spatrick// *CLM* 26373471bf0Spatrickdefvar Zn3FPCLM0 = Zn3FPP0; 26473471bf0Spatrickdefvar Zn3FPCLM1 = Zn3FPP1; 26573471bf0Spatrick 26673471bf0Spatrick// Execution pipeline grouping 26773471bf0Spatrick//===----------------------------------------------------------------------===// 26873471bf0Spatrick 26973471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit 27073471bf0Spatrick// Stores and floating point to general purpose register transfer 27173471bf0Spatrick// have 2 dedicated pipelines (pipe 5 and 6). 27273471bf0Spatrickdef Zn3FPU0123 : ProcResGroup<[Zn3FPP0, Zn3FPP1, Zn3FPP2, Zn3FPP3]>; 27373471bf0Spatrick 27473471bf0Spatrick// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) 27573471bf0Spatrickdef Zn3FPFMul01 : ProcResGroup<[Zn3FPFMul0, Zn3FPFMul1]>; 27673471bf0Spatrick 27773471bf0Spatrick// (v)FADD* 27873471bf0Spatrick// Some complex VADD operations are not available in all pipes. 27973471bf0Spatrickdef Zn3FPFAdd01 : ProcResGroup<[Zn3FPFAdd0, Zn3FPFAdd1]>; 28073471bf0Spatrick 28173471bf0Spatrick// All convert operations except pack/unpack 28273471bf0Spatrickdef Zn3FPFCvt01 : ProcResGroup<[Zn3FPFCvt0, Zn3FPFCvt1]>; 28373471bf0Spatrick 28473471bf0Spatrick// All Divide and Square Root except Reciprocal Approximation 28573471bf0Spatrick// def Zn3FPFDiv : ProcResGroup<[Zn3FPFDiv]>; 28673471bf0Spatrick 28773471bf0Spatrick// Moves and Logical operations on Floating Point Data Types 28873471bf0Spatrickdef Zn3FPFMisc0123 : ProcResGroup<[Zn3FPFMisc0, Zn3FPFMisc1, Zn3FPFMisc2, Zn3FPFMisc3]>; 28973471bf0Spatrick 29073471bf0Spatrickdef Zn3FPFMisc12 : ProcResGroup<[Zn3FPFMisc1, Zn3FPFMisc2]>; 29173471bf0Spatrick 29273471bf0Spatrick// Loads, Stores and Move to General Register (EX) Operations 29373471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit 29473471bf0Spatrick// Stores and floating point to general purpose register transfer 29573471bf0Spatrick// have 2 dedicated pipelines (pipe 5 and 6). 29673471bf0Spatrickdefvar Zn3FPLd01 = Zn3FPP45; 29773471bf0Spatrick 29873471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit 29973471bf0Spatrick// Note that FP stores are supported on two pipelines, 30073471bf0Spatrick// but throughput is limited to one per cycle. 30173471bf0Spatricklet Super = Zn3FPP45 in 30273471bf0Spatrickdef Zn3FPSt : ProcResource<1>; 30373471bf0Spatrick 30473471bf0Spatrick// Integer Adds, Subtracts, and Compares 30573471bf0Spatrick// Some complex VADD operations are not available in all pipes. 30673471bf0Spatrickdef Zn3FPVAdd0123 : ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1, Zn3FPVAdd2, Zn3FPVAdd3]>; 30773471bf0Spatrick 30873471bf0Spatrickdef Zn3FPVAdd01: ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1]>; 30973471bf0Spatrickdef Zn3FPVAdd12: ProcResGroup<[Zn3FPVAdd1, Zn3FPVAdd2]>; 31073471bf0Spatrick 31173471bf0Spatrick// Integer Multiplies, SAD, Blendvb 31273471bf0Spatrickdef Zn3FPVMul01 : ProcResGroup<[Zn3FPVMul0, Zn3FPVMul1]>; 31373471bf0Spatrick 31473471bf0Spatrick// Data Shuffles, Packs, Unpacks, Permute 31573471bf0Spatrick// Some complex shuffle operations are only available in pipe1. 31673471bf0Spatrickdef Zn3FPVShuf01 : ProcResGroup<[Zn3FPVShuf, Zn3FPVShufAux]>; 31773471bf0Spatrick 31873471bf0Spatrick// Bit Shift Left/Right operations 31973471bf0Spatrickdef Zn3FPVShift01 : ProcResGroup<[Zn3FPVShift0, Zn3FPVShift1]>; 32073471bf0Spatrick 32173471bf0Spatrick// Moves and Logical operations on Packed Integer Data Types 32273471bf0Spatrickdef Zn3FPVMisc0123 : ProcResGroup<[Zn3FPVMisc0, Zn3FPVMisc1, Zn3FPVMisc2, Zn3FPVMisc3]>; 32373471bf0Spatrick 32473471bf0Spatrick// *AES* 32573471bf0Spatrickdef Zn3FPAES01 : ProcResGroup<[Zn3FPAES0, Zn3FPAES1]>; 32673471bf0Spatrick 32773471bf0Spatrick// *CLM* 32873471bf0Spatrickdef Zn3FPCLM01 : ProcResGroup<[Zn3FPCLM0, Zn3FPCLM1]>; 32973471bf0Spatrick 33073471bf0Spatrick 33173471bf0Spatrick// 33273471bf0Spatrick// Scheduling 33373471bf0Spatrick//===----------------------------------------------------------------------===// 33473471bf0Spatrick 33573471bf0Spatrick// Agner, 21.8 Register renaming and out-of-order schedulers 33673471bf0Spatrick// The floating point register file has 160 vector registers 33773471bf0Spatrick// of 128 bits each in Zen 1 and 256 bits each in Zen 2. 33873471bf0Spatrick// anandtech also confirms this. 33973471bf0Spatrickdef Zn3FpPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 1], [0, 1, 1], 34073471bf0Spatrick 6, // Max moves that can be eliminated per cycle. 34173471bf0Spatrick 0>; // Restrict move elimination to zero regs. 34273471bf0Spatrick 34373471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit 34473471bf0Spatrick// The floating-point scheduler has a 2*32 entry macro op capacity. 34573471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit 34673471bf0Spatrick// <...> the scheduler can issue 1 micro op per cycle for each pipe. 34773471bf0Spatrick// FIXME: those are two separate schedulers, not a single big one. 34873471bf0Spatrickdef Zn3FP : ProcResGroup<[Zn3FPP0, Zn3FPP2, /*Zn3FPP4,*/ // scheduler 0 34973471bf0Spatrick Zn3FPP1, Zn3FPP3, Zn3FPP45 /*Zn3FPP5*/ // scheduler 1 35073471bf0Spatrick ]> { 35173471bf0Spatrick let BufferSize = !mul(2, 32); 35273471bf0Spatrick} 35373471bf0Spatrick 35473471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit 35573471bf0Spatrick// Macro ops can be dispatched to the 64 entry Non Scheduling Queue (NSQ) 35673471bf0Spatrick// even if floating-point scheduler is full. 35773471bf0Spatrick// FIXME: how to model this properly? 35873471bf0Spatrick 35973471bf0Spatrick 36073471bf0Spatrick//===----------------------------------------------------------------------===// 36173471bf0Spatrick// Load-Store Unit 36273471bf0Spatrick// 36373471bf0Spatrick 36473471bf0Spatrick// AMD SOG 19h, 2.12 Load-Store Unit 36573471bf0Spatrick// The LS unit contains three largely independent pipe-lines 36673471bf0Spatrick// enabling the execution of three 256-bit memory operations per cycle. 36773471bf0Spatrickdef Zn3LSU : ProcResource<3>; 36873471bf0Spatrick 36973471bf0Spatrick// AMD SOG 19h, 2.12 Load-Store Unit 37073471bf0Spatrick// All three memory operations can be loads. 37173471bf0Spatricklet Super = Zn3LSU in 37273471bf0Spatrickdef Zn3Load : ProcResource<3> { 37373471bf0Spatrick // AMD SOG 19h, 2.12 Load-Store Unit 37473471bf0Spatrick // The LS unit can process up to 72 out-of-order loads. 37573471bf0Spatrick let BufferSize = 72; 37673471bf0Spatrick} 37773471bf0Spatrick 37873471bf0Spatrickdef Zn3LoadQueue : LoadQueue<Zn3Load>; 37973471bf0Spatrick 38073471bf0Spatrick// AMD SOG 19h, 2.12 Load-Store Unit 38173471bf0Spatrick// A maximum of two of the memory operations can be stores. 38273471bf0Spatricklet Super = Zn3LSU in 38373471bf0Spatrickdef Zn3Store : ProcResource<2> { 38473471bf0Spatrick // AMD SOG 19h, 2.12 Load-Store Unit 38573471bf0Spatrick // The LS unit utilizes a 64-entry store queue (STQ). 38673471bf0Spatrick let BufferSize = 64; 38773471bf0Spatrick} 38873471bf0Spatrick 38973471bf0Spatrickdef Zn3StoreQueue : StoreQueue<Zn3Store>; 39073471bf0Spatrick 39173471bf0Spatrick//===----------------------------------------------------------------------===// 39273471bf0Spatrick// Basic helper classes. 39373471bf0Spatrick//===----------------------------------------------------------------------===// 39473471bf0Spatrick 39573471bf0Spatrick// Many SchedWrites are defined in pairs with and without a folded load. 39673471bf0Spatrick// Instructions with folded loads are usually micro-fused, so they only appear 39773471bf0Spatrick// as two micro-ops when dispatched by the schedulers. 39873471bf0Spatrick// This multiclass defines the resource usage for variants with and without 39973471bf0Spatrick// folded loads. 40073471bf0Spatrick 40173471bf0Spatrickmulticlass __zn3WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts, 40273471bf0Spatrick int Lat = 1, list<int> Res = [], int UOps = 1> { 40373471bf0Spatrick def : WriteRes<SchedRW, ExePorts> { 40473471bf0Spatrick let Latency = Lat; 40573471bf0Spatrick let ResourceCycles = Res; 40673471bf0Spatrick let NumMicroOps = UOps; 40773471bf0Spatrick } 40873471bf0Spatrick} 40973471bf0Spatrick 41073471bf0Spatrickmulticlass __zn3WriteResPair<X86FoldableSchedWrite SchedRW, 41173471bf0Spatrick list<ProcResourceKind> ExePorts, int Lat, 41273471bf0Spatrick list<int> Res, int UOps, int LoadLat, int LoadUOps, 41373471bf0Spatrick ProcResourceKind AGU, int LoadRes> { 41473471bf0Spatrick defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 41573471bf0Spatrick 41673471bf0Spatrick defm : __zn3WriteRes<SchedRW.Folded, 41773471bf0Spatrick !listconcat([AGU, Zn3Load], ExePorts), 41873471bf0Spatrick !add(Lat, LoadLat), 41973471bf0Spatrick !if(!and(!empty(Res), !eq(LoadRes, 1)), 42073471bf0Spatrick [], 42173471bf0Spatrick !listconcat([1, LoadRes], 42273471bf0Spatrick !if(!empty(Res), 42373471bf0Spatrick !listsplat(1, !size(ExePorts)), 42473471bf0Spatrick Res))), 42573471bf0Spatrick !add(UOps, LoadUOps)>; 42673471bf0Spatrick} 42773471bf0Spatrick 42873471bf0Spatrick// For classes without folded loads. 42973471bf0Spatrickmulticlass Zn3WriteResInt<SchedWrite SchedRW, 43073471bf0Spatrick list<ProcResourceKind> ExePorts, int Lat = 1, 43173471bf0Spatrick list<int> Res = [], int UOps = 1> { 43273471bf0Spatrick defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 43373471bf0Spatrick} 43473471bf0Spatrick 43573471bf0Spatrickmulticlass Zn3WriteResXMM<SchedWrite SchedRW, 43673471bf0Spatrick list<ProcResourceKind> ExePorts, int Lat = 1, 43773471bf0Spatrick list<int> Res = [], int UOps = 1> { 43873471bf0Spatrick defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 43973471bf0Spatrick} 44073471bf0Spatrick 44173471bf0Spatrickmulticlass Zn3WriteResYMM<SchedWrite SchedRW, 44273471bf0Spatrick list<ProcResourceKind> ExePorts, int Lat = 1, 44373471bf0Spatrick list<int> Res = [], int UOps = 1> { 44473471bf0Spatrick defm : __zn3WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 44573471bf0Spatrick} 44673471bf0Spatrick 44773471bf0Spatrick// For classes with folded loads. 44873471bf0Spatrickmulticlass Zn3WriteResIntPair<X86FoldableSchedWrite SchedRW, 44973471bf0Spatrick list<ProcResourceKind> ExePorts, int Lat = 1, 45073471bf0Spatrick list<int> Res = [], int UOps = 1, 45173471bf0Spatrick int LoadUOps = 0, int LoadRes = 1> { 45273471bf0Spatrick defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 45373471bf0Spatrick Znver3Model.LoadLatency, 45473471bf0Spatrick LoadUOps, Zn3AGU012, LoadRes>; 45573471bf0Spatrick} 45673471bf0Spatrick 45773471bf0Spatrickmulticlass Zn3WriteResXMMPair<X86FoldableSchedWrite SchedRW, 45873471bf0Spatrick list<ProcResourceKind> ExePorts, int Lat = 1, 45973471bf0Spatrick list<int> Res = [], int UOps = 1, 46073471bf0Spatrick int LoadUOps = 0, int LoadRes = 1> { 46173471bf0Spatrick defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 46273471bf0Spatrick Znver3Model.VecLoadLatency, 46373471bf0Spatrick LoadUOps, Zn3FPLd01, LoadRes>; 46473471bf0Spatrick} 46573471bf0Spatrick 46673471bf0Spatrickmulticlass Zn3WriteResYMMPair<X86FoldableSchedWrite SchedRW, 46773471bf0Spatrick list<ProcResourceKind> ExePorts, int Lat = 1, 46873471bf0Spatrick list<int> Res = [], int UOps = 1, 46973471bf0Spatrick int LoadUOps = 0, int LoadRes = 1> { 47073471bf0Spatrick defm : __zn3WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 47173471bf0Spatrick Znver3Model.VecLoadLatency, 47273471bf0Spatrick LoadUOps, Zn3FPLd01, LoadRes>; 47373471bf0Spatrick} 47473471bf0Spatrick 47573471bf0Spatrick 47673471bf0Spatrick//===----------------------------------------------------------------------===// 47773471bf0Spatrick// Here be dragons. 47873471bf0Spatrick//===----------------------------------------------------------------------===// 47973471bf0Spatrick 48073471bf0Spatrickdef : ReadAdvance<ReadAfterLd, Znver3Model.LoadLatency>; 48173471bf0Spatrick 48273471bf0Spatrickdef : ReadAdvance<ReadAfterVecLd, Znver3Model.VecLoadLatency>; 48373471bf0Spatrickdef : ReadAdvance<ReadAfterVecXLd, Znver3Model.VecLoadLatency>; 48473471bf0Spatrickdef : ReadAdvance<ReadAfterVecYLd, Znver3Model.VecLoadLatency>; 48573471bf0Spatrick 48673471bf0Spatrick// AMD SOG 19h, 2.11 Floating-Point Unit 48773471bf0Spatrick// There is 1 cycle of added latency for a result to cross 48873471bf0Spatrick// from F to I or I to F domain. 48973471bf0Spatrickdef : ReadAdvance<ReadInt2Fpu, -1>; 49073471bf0Spatrick 49173471bf0Spatrick// Instructions with both a load and a store folded are modeled as a folded 49273471bf0Spatrick// load + WriteRMW. 49373471bf0Spatrickdefm : Zn3WriteResInt<WriteRMW, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 1], 0>; 49473471bf0Spatrick 49573471bf0Spatrick// Loads, stores, and moves, not folded with other operations. 49673471bf0Spatrickdefm : Zn3WriteResInt<WriteLoad, [Zn3AGU012, Zn3Load], !add(Znver3Model.LoadLatency, 1), [1, 1], 1>; 49773471bf0Spatrick 49873471bf0Spatrick// Model the effect of clobbering the read-write mask operand of the GATHER operation. 49973471bf0Spatrick// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 50073471bf0Spatrickdefm : Zn3WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver3Model.LoadLatency, 1), [], 0>; 50173471bf0Spatrick 50273471bf0Spatrickdef Zn3WriteMOVSlow : SchedWriteRes<[Zn3AGU012, Zn3Load]> { 50373471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, 1); 50473471bf0Spatrick let ResourceCycles = [3, 1]; 50573471bf0Spatrick let NumMicroOps = 1; 50673471bf0Spatrick} 50773471bf0Spatrickdef : InstRW<[Zn3WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>; 50873471bf0Spatrick 50973471bf0Spatrickdefm : Zn3WriteResInt<WriteStore, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 2], 1>; 51073471bf0Spatrickdefm : Zn3WriteResInt<WriteStoreNT, [Zn3AGU012, Zn3Store], Znver3Model.StoreLatency, [1, 2], 1>; 51173471bf0Spatrickdefm : Zn3WriteResInt<WriteMove, [Zn3ALU0123], 1, [4], 1>; 51273471bf0Spatrick 51373471bf0Spatrick// Treat misc copies as a move. 51473471bf0Spatrickdef : InstRW<[WriteMove], (instrs COPY)>; 51573471bf0Spatrick 51673471bf0Spatrickdef Zn3WriteMOVBE16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 51773471bf0Spatrick let Latency = Znver3Model.LoadLatency; 51873471bf0Spatrick let ResourceCycles = [1, 1, 4]; 51973471bf0Spatrick let NumMicroOps = 1; 52073471bf0Spatrick} 52173471bf0Spatrickdef : InstRW<[Zn3WriteMOVBE16rm], (instrs MOVBE16rm)>; 52273471bf0Spatrick 52373471bf0Spatrickdef Zn3WriteMOVBEmr : SchedWriteRes<[Zn3ALU0123, Zn3AGU012, Zn3Store]> { 52473471bf0Spatrick let Latency = Znver3Model.StoreLatency; 52573471bf0Spatrick let ResourceCycles = [4, 1, 1]; 52673471bf0Spatrick let NumMicroOps = 2; 52773471bf0Spatrick} 52873471bf0Spatrickdef : InstRW<[Zn3WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>; 52973471bf0Spatrick 53073471bf0Spatrick// Arithmetic. 53173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteALU, [Zn3ALU0123], 1, [1], 1>; // Simple integer ALU op. 53273471bf0Spatrick 53373471bf0Spatrickdef Zn3WriteALUSlow : SchedWriteRes<[Zn3ALU0123]> { 53473471bf0Spatrick let Latency = 1; 53573471bf0Spatrick let ResourceCycles = [4]; 53673471bf0Spatrick let NumMicroOps = 1; 53773471bf0Spatrick} 53873471bf0Spatrickdef : InstRW<[Zn3WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32, 53973471bf0Spatrick AND8i8, AND16i16, AND32i32, AND64i32, 54073471bf0Spatrick OR8i8, OR16i16, OR32i32, OR64i32, 54173471bf0Spatrick SUB8i8, SUB16i16, SUB32i32, SUB64i32, 54273471bf0Spatrick XOR8i8, XOR16i16, XOR32i32, XOR64i32)>; 54373471bf0Spatrick 54473471bf0Spatrickdef Zn3WriteMoveExtend : SchedWriteRes<[Zn3ALU0123]> { 54573471bf0Spatrick let Latency = 1; 54673471bf0Spatrick let ResourceCycles = [4]; 54773471bf0Spatrick let NumMicroOps = 1; 54873471bf0Spatrick} 54973471bf0Spatrickdef : InstRW<[Zn3WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>; 55073471bf0Spatrick 55173471bf0Spatrickdef Zn3WriteMaterialize32bitImm: SchedWriteRes<[Zn3ALU0123]> { 55273471bf0Spatrick let Latency = 1; 55373471bf0Spatrick let ResourceCycles = [2]; 55473471bf0Spatrick let NumMicroOps = 1; 55573471bf0Spatrick} 55673471bf0Spatrickdef : InstRW<[Zn3WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>; 55773471bf0Spatrick 55873471bf0Spatrickdef Zn3WritePDEP_PEXT : SchedWriteRes<[Zn3ALU1]> { 55973471bf0Spatrick let Latency = 3; 56073471bf0Spatrick let ResourceCycles = [1]; 56173471bf0Spatrick let NumMicroOps = 1; 56273471bf0Spatrick} 56373471bf0Spatrickdef : InstRW<[Zn3WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr, 56473471bf0Spatrick PEXT32rr, PEXT64rr)>; 56573471bf0Spatrick 56673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteADC, [Zn3ALU0123], 1, [4], 1>; // Integer ALU + flags op. 56773471bf0Spatrick 56873471bf0Spatrickdef Zn3WriteADC8mr_SBB8mr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123, Zn3Store]> { 56973471bf0Spatrick let Latency = 1; 57073471bf0Spatrick let ResourceCycles = [1, 1, 7, 1]; 57173471bf0Spatrick let NumMicroOps = 1; 57273471bf0Spatrick} 57373471bf0Spatrickdef : InstRW<[Zn3WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>; 57473471bf0Spatrick 57573471bf0Spatrick// This is for simple LEAs with one or two input operands. 57673471bf0Spatrickdefm : Zn3WriteResInt<WriteLEA, [Zn3AGU012], 1, [1], 1>; // LEA instructions can't fold loads. 57773471bf0Spatrick 57873471bf0Spatrick// This write is used for slow LEA instructions. 57973471bf0Spatrickdef Zn3Write3OpsLEA : SchedWriteRes<[Zn3ALU0123]> { 58073471bf0Spatrick let Latency = 2; 58173471bf0Spatrick let ResourceCycles = [1]; 58273471bf0Spatrick let NumMicroOps = 2; 58373471bf0Spatrick} 58473471bf0Spatrick 58573471bf0Spatrick// On Znver3, a slow LEA is either a 3Ops LEA (base, index, offset), 58673471bf0Spatrick// or an LEA with a `Scale` value different than 1. 58773471bf0Spatrickdef Zn3SlowLEAPredicate : MCSchedPredicate< 58873471bf0Spatrick CheckAny<[ 58973471bf0Spatrick // A 3-operand LEA (base, index, offset). 59073471bf0Spatrick IsThreeOperandsLEAFn, 59173471bf0Spatrick // An LEA with a "Scale" different than 1. 59273471bf0Spatrick CheckAll<[ 59373471bf0Spatrick CheckIsImmOperand<2>, 59473471bf0Spatrick CheckNot<CheckImmOperand<2, 1>> 59573471bf0Spatrick ]> 59673471bf0Spatrick ]> 59773471bf0Spatrick>; 59873471bf0Spatrick 59973471bf0Spatrickdef Zn3WriteLEA : SchedWriteVariant<[ 60073471bf0Spatrick SchedVar<Zn3SlowLEAPredicate, [Zn3Write3OpsLEA]>, 60173471bf0Spatrick SchedVar<NoSchedPred, [WriteLEA]> 60273471bf0Spatrick]>; 60373471bf0Spatrick 60473471bf0Spatrickdef : InstRW<[Zn3WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 60573471bf0Spatrick 60673471bf0Spatrickdef Zn3SlowLEA16r : SchedWriteRes<[Zn3ALU0123]> { 60773471bf0Spatrick let Latency = 2; // FIXME: not from llvm-exegesis 60873471bf0Spatrick let ResourceCycles = [4]; 60973471bf0Spatrick let NumMicroOps = 2; 61073471bf0Spatrick} 61173471bf0Spatrick 61273471bf0Spatrickdef : InstRW<[Zn3SlowLEA16r], (instrs LEA16r)>; 61373471bf0Spatrick 61473471bf0Spatrick// Integer multiplication 61573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul8, [Zn3Multiplier], 3, [3], 1>; // Integer 8-bit multiplication. 61673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/1>; // Integer 16-bit multiplication. 61773471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate. 61873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register. 61973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication. 620*d415bd75Srobertdefm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags. 62173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate. 62273471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register. 62373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication. 624*d415bd75Srobertdefm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags. 62573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate. 62673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register. 627*d415bd75Srobertdefm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part. 62873471bf0Spatrickdefm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part. 62973471bf0Spatrick 63073471bf0Spatrickdefm : Zn3WriteResInt<WriteBSWAP32, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap. 63173471bf0Spatrickdefm : Zn3WriteResInt<WriteBSWAP64, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap. 63273471bf0Spatrick 63373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteCMPXCHG, [Zn3ALU0123], 3, [12], 5>; // Compare and set, compare and swap. 63473471bf0Spatrick 63573471bf0Spatrickdef Zn3WriteCMPXCHG8rr : SchedWriteRes<[Zn3ALU0123]> { 63673471bf0Spatrick let Latency = 3; 63773471bf0Spatrick let ResourceCycles = [12]; 63873471bf0Spatrick let NumMicroOps = 3; 63973471bf0Spatrick} 64073471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 64173471bf0Spatrick 64273471bf0Spatrickdefm : Zn3WriteResInt<WriteCMPXCHGRMW, [Zn3ALU0123], 3, [12], 6>; // Compare and set, compare and swap. 64373471bf0Spatrick 64473471bf0Spatrickdef Zn3WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 64573471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteCMPXCHG8rr.Latency); 64673471bf0Spatrick let ResourceCycles = [1, 1, 12]; 64773471bf0Spatrick let NumMicroOps = !add(Zn3WriteCMPXCHG8rr.NumMicroOps, 2); 64873471bf0Spatrick} 64973471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>; 65073471bf0Spatrick 65173471bf0Spatrickdef Zn3WriteCMPXCHG8B : SchedWriteRes<[Zn3ALU0123]> { 65273471bf0Spatrick let Latency = 3; // FIXME: not from llvm-exegesis 65373471bf0Spatrick let ResourceCycles = [24]; 65473471bf0Spatrick let NumMicroOps = 19; 65573471bf0Spatrick} 65673471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG8B], (instrs CMPXCHG8B)>; 65773471bf0Spatrick 65873471bf0Spatrickdef Zn3WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn3ALU0123]> { 65973471bf0Spatrick let Latency = 4; // FIXME: not from llvm-exegesis 66073471bf0Spatrick let ResourceCycles = [59]; 66173471bf0Spatrick let NumMicroOps = 28; 66273471bf0Spatrick} 66373471bf0Spatrickdef : InstRW<[Zn3WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>; 66473471bf0Spatrick 66573471bf0Spatrickdef Zn3WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn3ALU0123]> { 66673471bf0Spatrick let Latency = 1; 66773471bf0Spatrick let ResourceCycles = [2]; 66873471bf0Spatrick let NumMicroOps = 2; 66973471bf0Spatrick} 67073471bf0Spatrickdef : InstRW<[Zn3WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>; 67173471bf0Spatrick 67273471bf0Spatrickdef Zn3WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 67373471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, 3); // FIXME: not from llvm-exegesis 67473471bf0Spatrick let ResourceCycles = [1, 1, 2]; 67573471bf0Spatrick let NumMicroOps = 5; 67673471bf0Spatrick} 67773471bf0Spatrickdef : InstRW<[Zn3WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>; 67873471bf0Spatrick 67973471bf0Spatrickdef Zn3WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { 68073471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, 2); // FIXME: not from llvm-exegesis 68173471bf0Spatrick let ResourceCycles = [1, 1, 2]; 68273471bf0Spatrick let NumMicroOps = 2; 68373471bf0Spatrick} 68473471bf0Spatrickdef : InstRW<[Zn3WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>; 68573471bf0Spatrick 68673471bf0Spatrick// Integer division. 68773471bf0Spatrick// FIXME: uops for 8-bit division measures as 2. for others it's a guess. 68873471bf0Spatrick// FIXME: latency for 8-bit division measures as 10. for others it's a guess. 68973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv8, [Zn3Divider], 10, [10], 2>; 69073471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv16, [Zn3Divider], 11, [11], 2>; 69173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv32, [Zn3Divider], 13, [13], 2>; 69273471bf0Spatrickdefm : Zn3WriteResIntPair<WriteDiv64, [Zn3Divider], 17, [17], 2>; 69373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv8, [Zn3Divider], 10, [10], 2>; 69473471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv16, [Zn3Divider], 11, [11], 2>; 69573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv32, [Zn3Divider], 13, [13], 2>; 69673471bf0Spatrickdefm : Zn3WriteResIntPair<WriteIDiv64, [Zn3Divider], 17, [17], 2>; 69773471bf0Spatrick 69873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBSF, [Zn3ALU1], 3, [3], 6, /*LoadUOps=*/2>; // Bit scan forward. 69973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBSR, [Zn3ALU1], 4, [4], 6, /*LoadUOps=*/2>; // Bit scan reverse. 70073471bf0Spatrick 70173471bf0Spatrickdefm : Zn3WriteResIntPair<WritePOPCNT, [Zn3ALU0123], 1, [1], 1>; // Bit population count. 70273471bf0Spatrick 70373471bf0Spatrickdef Zn3WritePOPCNT16rr : SchedWriteRes<[Zn3ALU0123]> { 70473471bf0Spatrick let Latency = 1; 70573471bf0Spatrick let ResourceCycles = [4]; 70673471bf0Spatrick let NumMicroOps = 1; 70773471bf0Spatrick} 70873471bf0Spatrickdef : InstRW<[Zn3WritePOPCNT16rr], (instrs POPCNT16rr)>; 70973471bf0Spatrick 71073471bf0Spatrickdefm : Zn3WriteResIntPair<WriteLZCNT, [Zn3ALU0123], 1, [1], 1>; // Leading zero count. 71173471bf0Spatrick 71273471bf0Spatrickdef Zn3WriteLZCNT16rr : SchedWriteRes<[Zn3ALU0123]> { 71373471bf0Spatrick let Latency = 1; 71473471bf0Spatrick let ResourceCycles = [4]; 71573471bf0Spatrick let NumMicroOps = 1; 71673471bf0Spatrick} 71773471bf0Spatrickdef : InstRW<[Zn3WriteLZCNT16rr], (instrs LZCNT16rr)>; 71873471bf0Spatrick 71973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteTZCNT, [Zn3ALU12], 2, [1], 2>; // Trailing zero count. 72073471bf0Spatrick 72173471bf0Spatrickdef Zn3WriteTZCNT16rr : SchedWriteRes<[Zn3ALU0123]> { 72273471bf0Spatrick let Latency = 2; 72373471bf0Spatrick let ResourceCycles = [4]; 72473471bf0Spatrick let NumMicroOps = 2; 72573471bf0Spatrick} 72673471bf0Spatrickdef : InstRW<[Zn3WriteTZCNT16rr], (instrs TZCNT16rr)>; 72773471bf0Spatrick 72873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteCMOV, [Zn3ALU03], 1, [1], 1>; // Conditional move. 72973471bf0Spatrickdefm : Zn3WriteResInt<WriteFCMOV, [Zn3ALU0123], 7, [28], 7>; // FIXME: not from llvm-exegesis // X87 conditional move. 73073471bf0Spatrickdefm : Zn3WriteResInt<WriteSETCC, [Zn3ALU03], 1, [2], 1>; // Set register based on condition code. 73173471bf0Spatrickdefm : Zn3WriteResInt<WriteSETCCStore, [Zn3ALU03, Zn3AGU012, Zn3Store], 2, [2, 1, 1], 2>; // FIXME: latency not from llvm-exegesis 73273471bf0Spatrickdefm : Zn3WriteResInt<WriteLAHFSAHF, [Zn3ALU3], 1, [1], 1>; // Load/Store flags in AH. 73373471bf0Spatrick 73473471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTest, [Zn3ALU12], 1, [1], 1>; // Bit Test 73573471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestImmLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 1), [1, 1, 1], 2>; 73673471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestRegLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 1), [1, 1, 1], 7>; 73773471bf0Spatrick 73873471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestSet, [Zn3ALU12], 2, [2], 2>; // Bit Test + Set 73973471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestSetImmLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 1], 4>; 74073471bf0Spatrickdefm : Zn3WriteResInt<WriteBitTestSetRegLd, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 1], 9>; 74173471bf0Spatrick 74273471bf0Spatrick// Integer shifts and rotates. 74373471bf0Spatrickdefm : Zn3WriteResIntPair<WriteShift, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 74473471bf0Spatrickdefm : Zn3WriteResIntPair<WriteShiftCL, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 74573471bf0Spatrickdefm : Zn3WriteResIntPair<WriteRotate, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 74673471bf0Spatrick 74773471bf0Spatrickdef Zn3WriteRotateR1 : SchedWriteRes<[Zn3ALU12]> { 74873471bf0Spatrick let Latency = 1; 74973471bf0Spatrick let ResourceCycles = [2]; 75073471bf0Spatrick let NumMicroOps = 1; 75173471bf0Spatrick} 75273471bf0Spatrickdef : InstRW<[Zn3WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 75373471bf0Spatrick RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 75473471bf0Spatrick 75573471bf0Spatrickdef Zn3WriteRotateM1 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 75673471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateR1.Latency); 75773471bf0Spatrick let ResourceCycles = [1, 1, 2]; 75873471bf0Spatrick let NumMicroOps = !add(Zn3WriteRotateR1.NumMicroOps, 1); 75973471bf0Spatrick} 76073471bf0Spatrickdef : InstRW<[Zn3WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1, 76173471bf0Spatrick RCR8m1, RCR16m1, RCR32m1, RCR64m1)>; 76273471bf0Spatrick 76373471bf0Spatrickdef Zn3WriteRotateRightRI : SchedWriteRes<[Zn3ALU12]> { 76473471bf0Spatrick let Latency = 3; 76573471bf0Spatrick let ResourceCycles = [6]; 76673471bf0Spatrick let NumMicroOps = 7; 76773471bf0Spatrick} 76873471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 76973471bf0Spatrick 77073471bf0Spatrickdef Zn3WriteRotateRightMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 77173471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRI.Latency); 77273471bf0Spatrick let ResourceCycles = [1, 1, 8]; 77373471bf0Spatrick let NumMicroOps = !add(Zn3WriteRotateRightRI.NumMicroOps, 3); 77473471bf0Spatrick} 77573471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>; 77673471bf0Spatrick 77773471bf0Spatrickdef Zn3WriteRotateLeftRI : SchedWriteRes<[Zn3ALU12]> { 77873471bf0Spatrick let Latency = 4; 77973471bf0Spatrick let ResourceCycles = [8]; 78073471bf0Spatrick let NumMicroOps = 9; 78173471bf0Spatrick} 78273471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 78373471bf0Spatrick 78473471bf0Spatrickdef Zn3WriteRotateLeftMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 78573471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRI.Latency); 78673471bf0Spatrick let ResourceCycles = [1, 1, 8]; 78773471bf0Spatrick let NumMicroOps = !add(Zn3WriteRotateLeftRI.NumMicroOps, 2); 78873471bf0Spatrick} 78973471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>; 79073471bf0Spatrick 79173471bf0Spatrickdefm : Zn3WriteResIntPair<WriteRotateCL, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 79273471bf0Spatrick 79373471bf0Spatrickdef Zn3WriteRotateRightRCL : SchedWriteRes<[Zn3ALU12]> { 79473471bf0Spatrick let Latency = 3; 79573471bf0Spatrick let ResourceCycles = [6]; 79673471bf0Spatrick let NumMicroOps = 7; 79773471bf0Spatrick} 79873471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>; 79973471bf0Spatrick 80073471bf0Spatrickdef Zn3WriteRotateRightMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 80173471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRCL.Latency); 80273471bf0Spatrick let ResourceCycles = [1, 1, 8]; 80373471bf0Spatrick let NumMicroOps = !add(Zn3WriteRotateRightRCL.NumMicroOps, 2); 80473471bf0Spatrick} 80573471bf0Spatrickdef : InstRW<[Zn3WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>; 80673471bf0Spatrick 80773471bf0Spatrickdef Zn3WriteRotateLeftRCL : SchedWriteRes<[Zn3ALU12]> { 80873471bf0Spatrick let Latency = 4; 80973471bf0Spatrick let ResourceCycles = [8]; 81073471bf0Spatrick let NumMicroOps = 9; 81173471bf0Spatrick} 81273471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>; 81373471bf0Spatrick 81473471bf0Spatrickdef Zn3WriteRotateLeftMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { 81573471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRCL.Latency); 81673471bf0Spatrick let ResourceCycles = [1, 1, 8]; 81773471bf0Spatrick let NumMicroOps = !add(Zn3WriteRotateLeftRCL.NumMicroOps, 2); 81873471bf0Spatrick} 81973471bf0Spatrickdef : InstRW<[Zn3WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>; 82073471bf0Spatrick 82173471bf0Spatrick// Double shift instructions. 82273471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDrri, [Zn3ALU12], 2, [3], 4>; 82373471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDrrcl, [Zn3ALU12], 2, [3], 5>; 82473471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDmri, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 4], 6>; 82573471bf0Spatrickdefm : Zn3WriteResInt<WriteSHDmrcl, [Zn3AGU012, Zn3Load, Zn3ALU12], !add(Znver3Model.LoadLatency, 2), [1, 1, 4], 6>; 82673471bf0Spatrick 82773471bf0Spatrick// BMI1 BEXTR/BLS, BMI2 BZHI 82873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBEXTR, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 82973471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBLS, [Zn3ALU0123], 2, [2], 2, /*LoadUOps=*/1>; 83073471bf0Spatrickdefm : Zn3WriteResIntPair<WriteBZHI, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>; 83173471bf0Spatrick 83273471bf0Spatrick// Idioms that clear a register, like xorps %xmm0, %xmm0. 83373471bf0Spatrick// These can often bypass execution ports completely. 83473471bf0Spatrickdefm : Zn3WriteResInt<WriteZero, [Zn3ALU0123], 0, [0], 1>; 83573471bf0Spatrick 83673471bf0Spatrick// Branches don't produce values, so they have no latency, but they still 83773471bf0Spatrick// consume resources. Indirect branches can fold loads. 83873471bf0Spatrickdefm : Zn3WriteResIntPair<WriteJump, [Zn3BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis 83973471bf0Spatrick 84073471bf0Spatrick// Floating point. This covers both scalar and vector operations. 84173471bf0Spatrickdefm : Zn3WriteResInt<WriteFLD0, [Zn3FPLd01, Zn3Load, Zn3FPP1], !add(Znver3Model.LoadLatency, 4), [1, 1, 1], 1>; 84273471bf0Spatrickdefm : Zn3WriteResInt<WriteFLD1, [Zn3FPLd01, Zn3Load, Zn3FPP1], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>; 84373471bf0Spatrickdefm : Zn3WriteResInt<WriteFLDC, [Zn3FPLd01, Zn3Load, Zn3FPP1], !add(Znver3Model.LoadLatency, 7), [1, 1, 1], 1>; 84473471bf0Spatrickdefm : Zn3WriteResXMM<WriteFLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 84573471bf0Spatrickdefm : Zn3WriteResXMM<WriteFLoadX, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 84673471bf0Spatrickdefm : Zn3WriteResYMM<WriteFLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 84773471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMaskedLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 84873471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMaskedLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 84973471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStore, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 85073471bf0Spatrick 85173471bf0Spatrickdef Zn3WriteWriteFStoreMMX : SchedWriteRes<[Zn3FPSt, Zn3Store]> { 85273471bf0Spatrick let Latency = 2; // FIXME: not from llvm-exegesis 85373471bf0Spatrick let ResourceCycles = [1, 1]; 85473471bf0Spatrick let NumMicroOps = 2; 85573471bf0Spatrick} 85673471bf0Spatrickdef : InstRW<[Zn3WriteWriteFStoreMMX], (instrs MOVHPDmr, MOVHPSmr, 85773471bf0Spatrick VMOVHPDmr, VMOVHPSmr)>; 85873471bf0Spatrick 85973471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStoreX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 86073471bf0Spatrickdefm : Zn3WriteResYMM<WriteFStoreY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 86173471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStoreNT, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 86273471bf0Spatrickdefm : Zn3WriteResXMM<WriteFStoreNTX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 86373471bf0Spatrickdefm : Zn3WriteResYMM<WriteFStoreNTY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 86473471bf0Spatrick 86573471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMaskedStore32, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 86673471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMaskedStore64, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [4, 1], 10>; 86773471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMaskedStore32Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [12, 1], 42>; 86873471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMaskedStore64Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 86973471bf0Spatrick 87073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAdd, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub. 87173471bf0Spatrick 87273471bf0Spatrickdef Zn3WriteX87Arith : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 87373471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis 87473471bf0Spatrick let ResourceCycles = [1, 1, 24]; 87573471bf0Spatrick let NumMicroOps = 2; 87673471bf0Spatrick} 87773471bf0Spatrickdef : InstRW<[Zn3WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m, 87873471bf0Spatrick SUB_FI16m, SUB_FI32m, 87973471bf0Spatrick SUBR_FI16m, SUBR_FI32m, 88073471bf0Spatrick MUL_FI16m, MUL_FI32m)>; 88173471bf0Spatrick 88273471bf0Spatrickdef Zn3WriteX87Div : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 88373471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis 88473471bf0Spatrick let ResourceCycles = [1, 1, 62]; 88573471bf0Spatrick let NumMicroOps = 2; 88673471bf0Spatrick} 88773471bf0Spatrickdef : InstRW<[Zn3WriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 88873471bf0Spatrick DIVR_FI16m, DIVR_FI32m)>; 88973471bf0Spatrick 89073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAddX, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (XMM). 89173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFAddY, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (YMM). 89273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFAddZ>; // Floating point add/sub (ZMM). 89373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAdd64, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub. 89473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFAdd64X, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub (XMM). 89573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFAdd64Y, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub (YMM). 89673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFAdd64Z>; // Floating point double add/sub (ZMM). 89773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmp, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare. 89873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmpX, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (XMM). 89973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFCmpY, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (YMM). 90073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFCmpZ>; // Floating point compare (ZMM). 90173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmp64, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare. 90273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCmp64X, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare (XMM). 90373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFCmp64Y, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare (YMM). 90473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFCmp64Z>; // Floating point double compare (ZMM). 90573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFCom, [Zn3FPFMul01], 3, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (X87). 90673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFComX, [Zn3FPFMul01], 4, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (SSE). 90773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMul, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication. 90873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMulX, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (XMM). 90973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFMulY, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (YMM). 91073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFMulZ>; // Floating point multiplication (YMM). 91173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMul64, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication. 91273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFMul64X, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication (XMM). 91373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFMul64Y, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multiplication (YMM). 91473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFMul64Z>; // Floating point double multiplication (ZMM). 91573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDiv, [Zn3FPFDiv], 11, [3], 1>; // Floating point division. 91673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDivX, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (XMM). 91773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFDivY, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (YMM). 91873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFDivZ>; // Floating point division (ZMM). 91973471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDiv64, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division. 92073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFDiv64X, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division (XMM). 92173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFDiv64Y, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division (YMM). 92273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFDiv64Z>; // Floating point double division (ZMM). 92373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root. 92473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrtX, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root (XMM). 92573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFSqrtY, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root (YMM). 92673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFSqrtZ>; // Floating point square root (ZMM). 92773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt64, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root. 92873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt64X, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root (XMM). 92973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFSqrt64Y, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square root (YMM). 93073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFSqrt64Z>; // Floating point double square root (ZMM). 93173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSqrt80, [Zn3FPFDiv], 22, [23], 1>; // FIXME: latency not from llvm-exegesis // Floating point long double square root. 93273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRcp, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate. 93373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRcpX, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate (XMM). 93473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFRcpY, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estimate (YMM). 93573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFRcpZ>; // Floating point reciprocal estimate (ZMM). 93673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRsqrt, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate. 93773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRsqrtX, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate (XMM). 93873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFRsqrtY, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal square root estimate (YMM). 93973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFRsqrtZ>; // Floating point reciprocal square root estimate (ZMM). 940*d415bd75Srobertdefm : Zn3WriteResXMMPair<WriteFMA, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add. 941*d415bd75Srobertdefm : Zn3WriteResXMMPair<WriteFMAX, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add (XMM). 942*d415bd75Srobertdefm : Zn3WriteResYMMPair<WriteFMAY, [Zn3FPFMul01], 4, [1], 1>; // Fused Multiply Add (YMM). 94373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFMAZ>; // Fused Multiply Add (ZMM). 94473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteDPPD, [Zn3FPFMul01], 9, [6], 3, /*LoadUOps=*/2>; // Floating point double dot product. 94573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteDPPS, [Zn3FPFMul01], 15, [8], 8, /*LoadUOps=*/2>; // Floating point single dot product. 94673471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteDPPSY, [Zn3FPFMul01], 15, [8], 7, /*LoadUOps=*/1>; // Floating point single dot product (YMM). 94773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFSign, [Zn3FPFMul01], 1, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point fabs/fchs. 94873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFRnd, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding. 94973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFRndY, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding (YMM). 95073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFRndZ>; // Floating point rounding (ZMM). 95173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFLogic, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals. 95273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFLogicY, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals (YMM). 95373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFLogicZ>; // Floating point and/or/xor logicals (ZMM). 95473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFTest, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions. 95573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFTestY, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (YMM). 95673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFTestZ>; // Floating point TEST instructions (ZMM). 95773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuffles. 95873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuffles (YMM). 95973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFShuffleZ>; // Floating point vector shuffles (ZMM). 96073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFVarShuffle, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles. 96173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFVarShuffleY, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles (YMM). 96273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; // Floating point vector variable shuffles (ZMM). 96373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFBlend, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends. 96473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFBlendY, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends (YMM). 96573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFBlendZ>; // Floating point vector blends (ZMM). 96673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFVarBlend, [Zn3FPFMul01], 1, [1], 1>; // Fp vector variable blends. 96773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFVarBlendY, [Zn3FPFMul01], 1, [1], 1>; // Fp vector variable blends (YMM). 96873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFVarBlendZ>; // Fp vector variable blends (ZMM). 96973471bf0Spatrick 97073471bf0Spatrick// Horizontal Add/Sub (float and integer) 97173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteFHAdd, [Zn3FPFAdd0], 6, [2], 4>; 97273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFHAddY, [Zn3FPFAdd0], 6, [2], 3, /*LoadUOps=*/1>; 97373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteFHAddZ>; 97473471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePHAdd, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>; 97573471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePHAddX, [Zn3FPVAdd0], 2, [2], 4>; 97673471bf0Spatrickdefm : Zn3WriteResYMMPair<WritePHAddY, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>; 97773471bf0Spatrickdefm : X86WriteResPairUnsupported<WritePHAddZ>; 97873471bf0Spatrick 97973471bf0Spatrick// Vector integer operations. 98073471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 98173471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecLoadX, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 98273471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 98373471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecLoadNT, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 98473471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecLoadNTY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 98573471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMaskedLoad, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 98673471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMaskedLoadY, [Zn3FPLd01, Zn3Load], !add(Znver3Model.VecLoadLatency, 1), [1, 1], 1>; 98773471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecStore, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 98873471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecStoreX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 98973471bf0Spatrick 99073471bf0Spatrickdef Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> { 99173471bf0Spatrick let Latency = 4; 99273471bf0Spatrick let ResourceCycles = [1]; 99373471bf0Spatrick let NumMicroOps = 1; 99473471bf0Spatrick} 99573471bf0Spatrickdef : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>; 99673471bf0Spatrick 99773471bf0Spatrickdef Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> { 99873471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); 99973471bf0Spatrick let ResourceCycles = [1, 1, 1]; 100073471bf0Spatrick let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1); 100173471bf0Spatrick} 100273471bf0Spatrickdef : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>; 100373471bf0Spatrick 100473471bf0Spatrickdef Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> { 100573471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); 100673471bf0Spatrick let ResourceCycles = [1, 1, 1]; 100773471bf0Spatrick let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0); 100873471bf0Spatrick} 100973471bf0Spatrickdef : InstRW<[Zn3WriteVINSERTF128rmr], (instrs VINSERTF128rm)>; 101073471bf0Spatrick 101173471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecStoreY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 101273471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecStoreNT, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 101373471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecStoreNTY, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [1, 1], 1>; 101473471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMaskedStore32, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 101573471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMaskedStore64, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [4, 1], 10>; 101673471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMaskedStore32Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [12, 1], 42>; 101773471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMaskedStore64Y, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency, [6, 1], 18>; 101873471bf0Spatrick 101973471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMoveToGpr, [Zn3FPLd01], 1, [2], 1>; 102073471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMoveFromGpr, [Zn3FPLd01], 1, [2], 1>; 102173471bf0Spatrick 102273471bf0Spatrickdef Zn3WriteMOVMMX : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> { 102373471bf0Spatrick let Latency = 1; 102473471bf0Spatrick let ResourceCycles = [1, 2]; 102573471bf0Spatrick let NumMicroOps = 2; 102673471bf0Spatrick} 102773471bf0Spatrickdef : InstRW<[Zn3WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>; 102873471bf0Spatrick 102973471bf0Spatrickdef Zn3WriteMOVMMXSlow : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> { 103073471bf0Spatrick let Latency = 1; 103173471bf0Spatrick let ResourceCycles = [1, 4]; 103273471bf0Spatrick let NumMicroOps = 2; 103373471bf0Spatrick} 103473471bf0Spatrickdef : InstRW<[Zn3WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>; 103573471bf0Spatrick 103673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecALU, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals. 103773471bf0Spatrick 103873471bf0Spatrickdef Zn3WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> { 103973471bf0Spatrick let Latency = 3; 104073471bf0Spatrick let ResourceCycles = [1, 1]; 104173471bf0Spatrick let NumMicroOps = 1; 104273471bf0Spatrick} 104373471bf0Spatrickdef : InstRW<[Zn3WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>; 104473471bf0Spatrick 104573471bf0Spatrickdef Zn3WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> { 104673471bf0Spatrick let Latency = 3; 104773471bf0Spatrick let ResourceCycles = [1, 1]; 104873471bf0Spatrick let NumMicroOps = 2; 104973471bf0Spatrick} 105073471bf0Spatrickdef : InstRW<[Zn3WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>; 105173471bf0Spatrick 105273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecALUX, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (XMM). 105373471bf0Spatrick 105473471bf0Spatrickdef Zn3WriteVecALUXSlow : SchedWriteRes<[Zn3FPVAdd01]> { 105573471bf0Spatrick let Latency = 1; 105673471bf0Spatrick let ResourceCycles = [1]; 105773471bf0Spatrick let NumMicroOps = 1; 105873471bf0Spatrick} 105973471bf0Spatrickdef : InstRW<[Zn3WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr, 106073471bf0Spatrick PADDSBrr, PADDSWrr, PADDUSBrr, PADDUSWrr, 106173471bf0Spatrick PAVGBrr, PAVGWrr, 106273471bf0Spatrick PSIGNBrr, PSIGNDrr, PSIGNWrr, 106373471bf0Spatrick VPABSBrr, VPABSDrr, VPABSWrr, 106473471bf0Spatrick VPADDSBrr, VPADDSWrr, VPADDUSBrr, VPADDUSWrr, 106573471bf0Spatrick VPAVGBrr, VPAVGWrr, 106673471bf0Spatrick VPCMPEQQrr, 106773471bf0Spatrick VPSIGNBrr, VPSIGNDrr, VPSIGNWrr, 106873471bf0Spatrick PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr)>; 106973471bf0Spatrick 107073471bf0Spatrickdef Zn3WriteVecALUXMMX : SchedWriteRes<[Zn3FPVAdd01]> { 107173471bf0Spatrick let Latency = 1; 107273471bf0Spatrick let ResourceCycles = [1]; 107373471bf0Spatrick let NumMicroOps = 1; 107473471bf0Spatrick} 107573471bf0Spatrickdef : InstRW<[Zn3WriteVecALUXMMX], (instrs MMX_PABSBrr, MMX_PABSDrr, MMX_PABSWrr, 107673471bf0Spatrick MMX_PSIGNBrr, MMX_PSIGNDrr, MMX_PSIGNWrr, 1077*d415bd75Srobert MMX_PADDSBrr, MMX_PADDSWrr, MMX_PADDUSBrr, MMX_PADDUSWrr, 1078*d415bd75Srobert MMX_PAVGBrr, MMX_PAVGWrr, 1079*d415bd75Srobert MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr)>; 108073471bf0Spatrick 108173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecALUY, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM). 108273471bf0Spatrick 108373471bf0Spatrickdef Zn3WriteVecALUYSlow : SchedWriteRes<[Zn3FPVAdd01]> { 108473471bf0Spatrick let Latency = 1; 108573471bf0Spatrick let ResourceCycles = [1]; 108673471bf0Spatrick let NumMicroOps = 1; 108773471bf0Spatrick} 108873471bf0Spatrickdef : InstRW<[Zn3WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr, 108973471bf0Spatrick VPADDSBYrr, VPADDSWYrr, VPADDUSBYrr, VPADDUSWYrr, 109073471bf0Spatrick VPSUBSBYrr, VPSUBSWYrr, VPSUBUSBYrr, VPSUBUSWYrr, 109173471bf0Spatrick VPAVGBYrr, VPAVGWYrr, 109273471bf0Spatrick VPCMPEQQYrr, 109373471bf0Spatrick VPSIGNBYrr, VPSIGNDYrr, VPSIGNWYrr)>; 109473471bf0Spatrick 109573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecALUZ>; // Vector integer ALU op, no logicals (ZMM). 109673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecLogic, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals. 109773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecLogicX, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (XMM). 109873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecLogicY, [Zn3FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (YMM). 109973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecLogicZ>; // Vector integer and/or/xor logicals (ZMM). 110073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecTest, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions. 110173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecTestY, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions (YMM). 110273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecTestZ>; // Vector integer TEST instructions (ZMM). 110373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShift, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (default). 110473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShiftX, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (XMM). 110573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecShiftY, [Zn3FPVShift01], 1, [1], 1>; // Vector integer shifts (YMM). 110673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecShiftZ>; // Vector integer shifts (ZMM). 110773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShiftImm, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (default). 110873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecShiftImmX, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (XMM). 110973471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecShiftImmY, [Zn3FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (YMM). 111073471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; // Vector integer immediate shifts (ZMM). 111173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecIMul, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (default). 111273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecIMulX, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (XMM). 111373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVecIMulY, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (YMM). 111473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVecIMulZ>; // Vector integer multiply (ZMM). 111573471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePMULLD, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD. 111673471bf0Spatrickdefm : Zn3WriteResYMMPair<WritePMULLDY, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD (YMM). 111773471bf0Spatrickdefm : X86WriteResPairUnsupported<WritePMULLDZ>; // Vector PMULLD (ZMM). 111873471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles. 111973471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteShuffleX, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles (XMM). 112073471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Vector shuffles (YMM). 112173471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteShuffleZ>; // Vector shuffles (ZMM). 112273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles. 112373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarShuffleX, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles (XMM). 112473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVarShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Vector variable shuffles (YMM). 112573471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVarShuffleZ>; // Vector variable shuffles (ZMM). 112673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteBlend, [Zn3FPVMisc0123], 1, [1], 1>; // Vector blends. 112773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteBlendY, [Zn3FPVMisc0123], 1, [1], 1>; // Vector blends (YMM). 112873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteBlendZ>; // Vector blends (ZMM). 112973471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarBlend, [Zn3FPVMul01], 1, [1], 1>; // Vector variable blends. 113073471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVarBlendY, [Zn3FPVMul01], 1, [1], 1>; // Vector variable blends (YMM). 113173471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVarBlendZ>; // Vector variable blends (ZMM). 113273471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePSADBW, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW. 113373471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePSADBWX, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (XMM). 113473471bf0Spatrickdefm : Zn3WriteResYMMPair<WritePSADBWY, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (YMM). 113573471bf0Spatrickdefm : X86WriteResPairUnsupported<WritePSADBWZ>; // Vector PSADBW (ZMM). 113673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteMPSAD, [Zn3FPVAdd0123], 4, [8], 4, /*LoadUOps=*/2>; // Vector MPSAD. 113773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteMPSADY, [Zn3FPVAdd0123], 4, [8], 3, /*LoadUOps=*/1>; // Vector MPSAD (YMM). 113873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteMPSADZ>; // Vector MPSAD (ZMM). 113973471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePHMINPOS, [Zn3FPVAdd01], 3, [1], 1>; // Vector PHMINPOS. 114073471bf0Spatrick 114173471bf0Spatrick// Vector insert/extract operations. 114273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVecInsert, [Zn3FPLd01], 1, [2], 2, /*LoadUOps=*/-1>; // Insert gpr to vector element. 114373471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecExtract, [Zn3FPLd01], 1, [2], 2>; // Extract vector element to gpr. 114473471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecExtractSt, [Zn3FPSt, Zn3Store], !add(1, Znver3Model.StoreLatency), [1, 1], 2>; // Extract vector element and store. 114573471bf0Spatrick 114673471bf0Spatrick// MOVMSK operations. 114773471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMOVMSK, [Zn3FPVMisc2], 1, [1], 1>; 114873471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMOVMSK, [Zn3FPVMisc2], 1, [1], 1>; 114973471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMOVMSKY, [Zn3FPVMisc2], 1, [1], 1>; 115073471bf0Spatrickdefm : Zn3WriteResXMM<WriteMMXMOVMSK, [Zn3FPVMisc2], 1, [1], 1>; 115173471bf0Spatrick 115273471bf0Spatrick// Conversion between integer and float. 115373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSD2I, [Zn3FPFCvt01], 2, [2], 2>; // Double -> Integer. 115473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPD2I, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Integer (XMM). 115573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPD2IY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Integer (YMM). 115673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; // Double -> Integer (ZMM). 115773471bf0Spatrick 115873471bf0Spatrickdef Zn3WriteCvtPD2IMMX : SchedWriteRes<[Zn3FPFCvt01]> { 115973471bf0Spatrick let Latency = 1; 116073471bf0Spatrick let ResourceCycles = [2]; 116173471bf0Spatrick let NumMicroOps = 2; 116273471bf0Spatrick} 1163*d415bd75Srobertdef : InstRW<[Zn3WriteCvtPD2IMMX], (instrs MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm, MMX_CVTPD2PIrr, MMX_CVTTPD2PIrr)>; 116473471bf0Spatrick 116573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSS2I, [Zn3FPFCvt01], 2, [2], 2>; // Float -> Integer. 116673471bf0Spatrick 116773471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPS2I, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (XMM). 116873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPS2IY, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (YMM). 116973471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; // Float -> Integer (ZMM). 117073471bf0Spatrick 117173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2SD, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double. 117273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2PD, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Double (XMM). 117373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtI2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double (YMM). 117473471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; // Integer -> Double (ZMM). 117573471bf0Spatrick 117673471bf0Spatrickdef Zn3WriteCvtI2PDMMX : SchedWriteRes<[Zn3FPFCvt01]> { 117773471bf0Spatrick let Latency = 2; 117873471bf0Spatrick let ResourceCycles = [6]; 117973471bf0Spatrick let NumMicroOps = 2; 118073471bf0Spatrick} 1181*d415bd75Srobertdef : InstRW<[Zn3WriteCvtI2PDMMX], (instrs MMX_CVTPI2PDrm, MMX_CVTPI2PDrr)>; 118273471bf0Spatrick 118373471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2SS, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Float. 118473471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtI2PS, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM). 118573471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtI2PSY, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (YMM). 118673471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; // Integer -> Float (ZMM). 118773471bf0Spatrick 118873471bf0Spatrickdef Zn3WriteCvtI2PSMMX : SchedWriteRes<[Zn3FPFCvt01]> { 118973471bf0Spatrick let Latency = 3; 119073471bf0Spatrick let ResourceCycles = [1]; 119173471bf0Spatrick let NumMicroOps = 2; 119273471bf0Spatrick} 1193*d415bd75Srobertdef : InstRW<[Zn3WriteCvtI2PSMMX], (instrs MMX_CVTPI2PSrr)>; 119473471bf0Spatrick 119573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSS2SD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conversion. 119673471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPS2PD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conversion (XMM). 119773471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPS2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Float -> Double size conversion (YMM). 119873471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; // Float -> Double size conversion (ZMM). 119973471bf0Spatrick 120073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtSD2SS, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Float size conversion. 120173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPD2PS, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Float size conversion (XMM). 120273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPD2PSY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Float size conversion (YMM). 120373471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; // Double -> Float size conversion (ZMM). 120473471bf0Spatrick 120573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCvtPH2PS, [Zn3FPFCvt01], 3, [1], 1>; // Half -> Float size conversion. 120673471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteCvtPH2PSY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Half -> Float size conversion (YMM). 120773471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; // Half -> Float size conversion (ZMM). 120873471bf0Spatrick 120973471bf0Spatrickdefm : Zn3WriteResXMM<WriteCvtPS2PH, [Zn3FPFCvt01], 3, [2], 1>; // Float -> Half size conversion. 121073471bf0Spatrickdefm : Zn3WriteResYMM<WriteCvtPS2PHY, [Zn3FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (YMM). 121173471bf0Spatrickdefm : X86WriteResUnsupported<WriteCvtPS2PHZ>; // Float -> Half size conversion (ZMM). 121273471bf0Spatrickdefm : Zn3WriteResXMM<WriteCvtPS2PHSt, [Zn3FPFCvt01, Zn3FPSt, Zn3Store], !add(3, Znver3Model.StoreLatency), [1, 1, 1], 2>; // Float -> Half + store size conversion. 121373471bf0Spatrickdefm : Zn3WriteResYMM<WriteCvtPS2PHYSt, [Zn3FPFCvt01, Zn3FPSt, Zn3Store], !add(6, Znver3Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (YMM). 121473471bf0Spatrickdefm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // Float -> Half + store size conversion (ZMM). 121573471bf0Spatrick 121673471bf0Spatrick// CRC32 instruction. 121773471bf0Spatrickdefm : Zn3WriteResIntPair<WriteCRC32, [Zn3ALU1], 3, [1], 1>; 121873471bf0Spatrick 121973471bf0Spatrickdef Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> { 122073471bf0Spatrick let Latency = 2; 122173471bf0Spatrick let ResourceCycles = [2]; 122273471bf0Spatrick let NumMicroOps = 2; 122373471bf0Spatrick} 122473471bf0Spatrickdef : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>; 122573471bf0Spatrick 122673471bf0Spatrickdef Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 122773471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency); 122873471bf0Spatrick let ResourceCycles = [1, 1, 2]; 122973471bf0Spatrick let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0); 123073471bf0Spatrick} 123173471bf0Spatrickdef : InstRW<[Zn3WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>; 123273471bf0Spatrick 123373471bf0Spatrickdef Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> { 123473471bf0Spatrick let Latency = 1; 123573471bf0Spatrick let ResourceCycles = [2]; 123673471bf0Spatrick let NumMicroOps = 1; 123773471bf0Spatrick} 123873471bf0Spatrickdef : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>; 123973471bf0Spatrick 124073471bf0Spatrickdef Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 124173471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency); 124273471bf0Spatrick let ResourceCycles = [1, 1, 2]; 124373471bf0Spatrick let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0); 124473471bf0Spatrick} 124573471bf0Spatrickdef : InstRW<[Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>; 124673471bf0Spatrick 124773471bf0Spatrickdef Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> { 124873471bf0Spatrick let Latency = 2; 124973471bf0Spatrick let ResourceCycles = [3]; 125073471bf0Spatrick let NumMicroOps = 2; 125173471bf0Spatrick} 125273471bf0Spatrickdef : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>; 125373471bf0Spatrick 125473471bf0Spatrickdef Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 125573471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency); 125673471bf0Spatrick let ResourceCycles = [1, 1, 3]; 125773471bf0Spatrick let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0); 125873471bf0Spatrick} 125973471bf0Spatrickdef : InstRW<[Zn3Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>; 126073471bf0Spatrick 126173471bf0Spatrickdef Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> { 126273471bf0Spatrick let Latency = 3; 126373471bf0Spatrick let ResourceCycles = [8]; 126473471bf0Spatrick let NumMicroOps = 4; 126573471bf0Spatrick} 126673471bf0Spatrickdef : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>; 126773471bf0Spatrick 126873471bf0Spatrickdef Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { 126973471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency); 127073471bf0Spatrick let ResourceCycles = [1, 1, 8]; 127173471bf0Spatrick let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1); 127273471bf0Spatrick} 127373471bf0Spatrickdef : InstRW<[Zn3WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>; 127473471bf0Spatrick 127573471bf0Spatrickdef Zn3WriteSHA1RNDS4rri : SchedWriteRes<[Zn3FPU0123]> { 127673471bf0Spatrick let Latency = 6; 127773471bf0Spatrick let ResourceCycles = [8]; 127873471bf0Spatrick let NumMicroOps = 1; 127973471bf0Spatrick} 128073471bf0Spatrickdef : InstRW<[Zn3WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>; 128173471bf0Spatrick 128273471bf0Spatrickdef Zn3WriteSHA256RNDS2rr : SchedWriteRes<[Zn3FPU0123]> { 128373471bf0Spatrick let Latency = 4; 128473471bf0Spatrick let ResourceCycles = [8]; 128573471bf0Spatrick let NumMicroOps = 1; 128673471bf0Spatrick} 128773471bf0Spatrickdef : InstRW<[Zn3WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>; 128873471bf0Spatrick 128973471bf0Spatrick// Strings instructions. 129073471bf0Spatrick// Packed Compare Implicit Length Strings, Return Mask 129173471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpIStrM, [Zn3FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>; 129273471bf0Spatrick// Packed Compare Explicit Length Strings, Return Mask 129373471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpEStrM, [Zn3FPVAdd0123], 6, [12], 7, /*LoadUOps=*/5>; 129473471bf0Spatrick// Packed Compare Implicit Length Strings, Return Index 129573471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpIStrI, [Zn3FPVAdd0123], 2, [8], 4>; 129673471bf0Spatrick// Packed Compare Explicit Length Strings, Return Index 129773471bf0Spatrickdefm : Zn3WriteResXMMPair<WritePCmpEStrI, [Zn3FPVAdd0123], 6, [12], 8, /*LoadUOps=*/4>; 129873471bf0Spatrick 129973471bf0Spatrick// AES instructions. 130073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteAESDecEnc, [Zn3FPAES01], 4, [1], 1>; // Decryption, encryption. 130173471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteAESIMC, [Zn3FPAES01], 4, [1], 1>; // InvMixColumn. 130273471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteAESKeyGen, [Zn3FPAES01], 4, [1], 1>; // Key Generation. 130373471bf0Spatrick 130473471bf0Spatrick// Carry-less multiplication instructions. 130573471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteCLMul, [Zn3FPCLM01], 4, [4], 4>; 130673471bf0Spatrick 130773471bf0Spatrick// EMMS/FEMMS 130873471bf0Spatrickdefm : Zn3WriteResInt<WriteEMMS, [Zn3ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis 130973471bf0Spatrick 131073471bf0Spatrick// Load/store MXCSR 131173471bf0Spatrickdefm : Zn3WriteResInt<WriteLDMXCSR, [Zn3AGU012, Zn3Load, Zn3ALU0123], !add(Znver3Model.LoadLatency, 1), [1, 1, 6], 1>; // FIXME: latency not from llvm-exegesis 131273471bf0Spatrickdefm : Zn3WriteResInt<WriteSTMXCSR, [Zn3ALU0123, Zn3AGU012, Zn3Store], !add(1, Znver3Model.StoreLatency), [60, 1, 1], 2>; // FIXME: latency not from llvm-exegesis 131373471bf0Spatrick 131473471bf0Spatrick// Catch-all for expensive system instructions. 131573471bf0Spatrickdefm : Zn3WriteResInt<WriteSystem, [Zn3ALU0123], 100, [100], 100>; 131673471bf0Spatrick 131773471bf0Spatrickdef Zn3WriteVZEROUPPER : SchedWriteRes<[Zn3FPU0123]> { 131873471bf0Spatrick let Latency = 0; // FIXME: not from llvm-exegesis 131973471bf0Spatrick let ResourceCycles = [1]; 132073471bf0Spatrick let NumMicroOps = 1; 132173471bf0Spatrick} 132273471bf0Spatrickdef : InstRW<[Zn3WriteVZEROUPPER], (instrs VZEROUPPER)>; 132373471bf0Spatrick 132473471bf0Spatrickdef Zn3WriteVZEROALL : SchedWriteRes<[Zn3FPU0123]> { 132573471bf0Spatrick let Latency = 10; // FIXME: not from llvm-exegesis 132673471bf0Spatrick let ResourceCycles = [24]; 132773471bf0Spatrick let NumMicroOps = 18; 132873471bf0Spatrick} 132973471bf0Spatrickdef : InstRW<[Zn3WriteVZEROALL], (instrs VZEROALL)>; 133073471bf0Spatrick 133173471bf0Spatrick// AVX2. 133273471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFShuffle256, [Zn3FPVShuf], 2, [1], 1, /*LoadUOps=*/2>; // Fp 256-bit width vector shuffles. 133373471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteFVarShuffle256, [Zn3FPVShuf], 7, [1], 2, /*LoadUOps=*/1>; // Fp 256-bit width variable shuffles. 133473471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteShuffle256, [Zn3FPVShuf], 2, [1], 1>; // 256-bit width vector shuffles. 133573471bf0Spatrick 133673471bf0Spatrickdef Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> { 133773471bf0Spatrick let Latency = 3; 133873471bf0Spatrick let ResourceCycles = [1]; 133973471bf0Spatrick let NumMicroOps = 1; 134073471bf0Spatrick} 134173471bf0Spatrickdef : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>; 134273471bf0Spatrick 134373471bf0Spatrickdef Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 134473471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency); 134573471bf0Spatrick let ResourceCycles = [1, 1, 1]; 134673471bf0Spatrick let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0); 134773471bf0Spatrick} 134873471bf0Spatrickdef : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rm)>; 134973471bf0Spatrick 135073471bf0Spatrickdef Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 1351*d415bd75Srobert let Latency = !add(Znver3Model.LoadLatency, 7); 135273471bf0Spatrick let ResourceCycles = [1, 1, 2]; 1353*d415bd75Srobert let NumMicroOps = 3; 135473471bf0Spatrick} 135573471bf0Spatrickdef : InstRW<[Zn3WriteVPERMPSYrm], (instrs VPERMPSYrm)>; 135673471bf0Spatrick 135773471bf0Spatrickdef Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> { 135873471bf0Spatrick let Latency = 6; 135973471bf0Spatrick let ResourceCycles = [1]; 136073471bf0Spatrick let NumMicroOps = 2; 136173471bf0Spatrick} 136273471bf0Spatrickdef : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>; 136373471bf0Spatrick 136473471bf0Spatrickdef Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 136573471bf0Spatrick let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency); 136673471bf0Spatrick let ResourceCycles = [1, 1, 2]; 136773471bf0Spatrick let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1); 136873471bf0Spatrick} 136973471bf0Spatrickdef : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>; 137073471bf0Spatrick 1371*d415bd75Srobertdef Zn3WriteVPERMDYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { 1372*d415bd75Srobert let Latency = !add(Znver3Model.LoadLatency, 5); 1373*d415bd75Srobert let ResourceCycles = [1, 1, 2]; 137473471bf0Spatrick let NumMicroOps = 2; 137573471bf0Spatrick} 1376*d415bd75Srobertdef : InstRW<[Zn3WriteVPERMDYm], (instrs VPERMQYmi, VPERMDYrm)>; 137773471bf0Spatrick 137873471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVPMOV256, [Zn3FPVShuf01], 4, [3], 2, /*LoadUOps=*/-1>; // 256-bit width packed vector width-changing move. 1379*d415bd75Srobertdefm : Zn3WriteResYMMPair<WriteVarShuffle256, [Zn3FPVShuf], 5, [1], 2, /*LoadUOps=*/1>; // 256-bit width vector variable shuffles. 138073471bf0Spatrickdefm : Zn3WriteResXMMPair<WriteVarVecShift, [Zn3FPVShift01], 1, [1], 1>; // Variable vector shifts. 138173471bf0Spatrickdefm : Zn3WriteResYMMPair<WriteVarVecShiftY, [Zn3FPVShift01], 1, [1], 1>; // Variable vector shifts (YMM). 138273471bf0Spatrickdefm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; // Variable vector shifts (ZMM). 138373471bf0Spatrick 138473471bf0Spatrick// Old microcoded instructions that nobody use. 138573471bf0Spatrickdefm : Zn3WriteResInt<WriteMicrocoded, [Zn3ALU0123], 100, [100], 100>; 138673471bf0Spatrick 138773471bf0Spatrick// Fence instructions. 138873471bf0Spatrickdefm : Zn3WriteResInt<WriteFence, [Zn3ALU0123], 1, [100], 1>; 138973471bf0Spatrick 139073471bf0Spatrickdef Zn3WriteLFENCE : SchedWriteRes<[Zn3LSU]> { 139173471bf0Spatrick let Latency = 1; 139273471bf0Spatrick let ResourceCycles = [30]; 139373471bf0Spatrick let NumMicroOps = 1; 139473471bf0Spatrick} 139573471bf0Spatrickdef : InstRW<[Zn3WriteLFENCE], (instrs LFENCE)>; 139673471bf0Spatrick 139773471bf0Spatrickdef Zn3WriteSFENCE : SchedWriteRes<[Zn3LSU]> { 139873471bf0Spatrick let Latency = 1; 139973471bf0Spatrick let ResourceCycles = [1]; 140073471bf0Spatrick let NumMicroOps = 1; 140173471bf0Spatrick} 140273471bf0Spatrickdef : InstRW<[Zn3WriteSFENCE], (instrs SFENCE)>; 140373471bf0Spatrick 140473471bf0Spatrick// Nop, not very useful expect it provides a model for nops! 140573471bf0Spatrickdefm : Zn3WriteResInt<WriteNop, [Zn3ALU0123], 0, [1], 1>; // FIXME: latency not from llvm-exegesis 140673471bf0Spatrick 140773471bf0Spatrick 140873471bf0Spatrick/////////////////////////////////////////////////////////////////////////////// 140973471bf0Spatrick// Zero Cycle Move 141073471bf0Spatrick/////////////////////////////////////////////////////////////////////////////// 141173471bf0Spatrick 141273471bf0Spatrickdef Zn3WriteZeroLatency : SchedWriteRes<[]> { 141373471bf0Spatrick let Latency = 0; 141473471bf0Spatrick let ResourceCycles = []; 141573471bf0Spatrick let NumMicroOps = 1; 141673471bf0Spatrick} 141773471bf0Spatrickdef : InstRW<[Zn3WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV, 141873471bf0Spatrick MOV64rr, MOV64rr_REV, 141973471bf0Spatrick MOVSX32rr32)>; 142073471bf0Spatrick 142173471bf0Spatrickdef Zn3WriteSwapRenameable : SchedWriteRes<[]> { 142273471bf0Spatrick let Latency = 0; 142373471bf0Spatrick let ResourceCycles = []; 142473471bf0Spatrick let NumMicroOps = 2; 142573471bf0Spatrick} 142673471bf0Spatrickdef : InstRW<[Zn3WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar, 142773471bf0Spatrick XCHG64rr, XCHG64ar)>; 142873471bf0Spatrick 142973471bf0Spatrickdefm : Zn3WriteResInt<WriteXCHG, [Zn3ALU0123], 0, [8], 2>; // Compare+Exchange - TODO RMW support. 143073471bf0Spatrick 143173471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMove, [Zn3FPVMisc0123], 1, [1], 1>; // Empty sched class 143273471bf0Spatrickdefm : Zn3WriteResXMM<WriteFMoveX, [], 0, [], 1>; 143373471bf0Spatrickdefm : Zn3WriteResYMM<WriteFMoveY, [], 0, [], 1>; 1434*d415bd75Srobertdefm : X86WriteResUnsupported<WriteFMoveZ>; 143573471bf0Spatrick 143673471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMove, [Zn3FPFMisc0123], 1, [1], 1>; // MMX 143773471bf0Spatrickdefm : Zn3WriteResXMM<WriteVecMoveX, [], 0, [], 1>; 143873471bf0Spatrickdefm : Zn3WriteResYMM<WriteVecMoveY, [], 0, [], 1>; 1439*d415bd75Srobertdefm : X86WriteResUnsupported<WriteVecMoveZ>; 144073471bf0Spatrick 144173471bf0Spatrickdef : IsOptimizableRegisterMove<[ 144273471bf0Spatrick InstructionEquivalenceClass<[ 144373471bf0Spatrick // GPR variants. 144473471bf0Spatrick MOV32rr, MOV32rr_REV, 144573471bf0Spatrick MOV64rr, MOV64rr_REV, 144673471bf0Spatrick MOVSX32rr32, 144773471bf0Spatrick XCHG32rr, XCHG32ar, 144873471bf0Spatrick XCHG64rr, XCHG64ar, 144973471bf0Spatrick 145073471bf0Spatrick // MMX variants. 145173471bf0Spatrick // MMX moves are *NOT* eliminated. 145273471bf0Spatrick 145373471bf0Spatrick // SSE variants. 145473471bf0Spatrick MOVAPSrr, MOVAPSrr_REV, 145573471bf0Spatrick MOVUPSrr, MOVUPSrr_REV, 145673471bf0Spatrick MOVAPDrr, MOVAPDrr_REV, 145773471bf0Spatrick MOVUPDrr, MOVUPDrr_REV, 145873471bf0Spatrick MOVDQArr, MOVDQArr_REV, 145973471bf0Spatrick MOVDQUrr, MOVDQUrr_REV, 146073471bf0Spatrick 146173471bf0Spatrick // AVX variants. 146273471bf0Spatrick VMOVAPSrr, VMOVAPSrr_REV, 146373471bf0Spatrick VMOVUPSrr, VMOVUPSrr_REV, 146473471bf0Spatrick VMOVAPDrr, VMOVAPDrr_REV, 146573471bf0Spatrick VMOVUPDrr, VMOVUPDrr_REV, 146673471bf0Spatrick VMOVDQArr, VMOVDQArr_REV, 146773471bf0Spatrick VMOVDQUrr, VMOVDQUrr_REV, 146873471bf0Spatrick 146973471bf0Spatrick // AVX YMM variants. 147073471bf0Spatrick VMOVAPSYrr, VMOVAPSYrr_REV, 147173471bf0Spatrick VMOVUPSYrr, VMOVUPSYrr_REV, 147273471bf0Spatrick VMOVAPDYrr, VMOVAPDYrr_REV, 147373471bf0Spatrick VMOVUPDYrr, VMOVUPDYrr_REV, 147473471bf0Spatrick VMOVDQAYrr, VMOVDQAYrr_REV, 147573471bf0Spatrick VMOVDQUYrr, VMOVDQUYrr_REV, 147673471bf0Spatrick ], TruePred > 147773471bf0Spatrick]>; 147873471bf0Spatrick 147973471bf0Spatrick/////////////////////////////////////////////////////////////////////////////// 148073471bf0Spatrick// Dependency breaking instructions. 148173471bf0Spatrick/////////////////////////////////////////////////////////////////////////////// 148273471bf0Spatrick 148373471bf0Spatrickdef Zn3WriteZeroIdiom : SchedWriteVariant<[ 148473471bf0Spatrick SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 148573471bf0Spatrick SchedVar<NoSchedPred, [WriteALU]> 148673471bf0Spatrick]>; 148773471bf0Spatrickdef : InstRW<[Zn3WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV, 148873471bf0Spatrick XOR64rr, XOR64rr_REV, 148973471bf0Spatrick SUB32rr, SUB32rr_REV, 149073471bf0Spatrick SUB64rr, SUB64rr_REV)>; 149173471bf0Spatrick 149273471bf0Spatrickdef Zn3WriteZeroIdiomEFLAGS : SchedWriteVariant<[ 149373471bf0Spatrick SchedVar<MCSchedPredicate<CheckSameRegOperand<0, 1>>, [Zn3WriteZeroLatency]>, 149473471bf0Spatrick SchedVar<NoSchedPred, [WriteALU]> 149573471bf0Spatrick]>; 149673471bf0Spatrickdef : InstRW<[Zn3WriteZeroIdiomEFLAGS], (instrs CMP8rr, CMP8rr_REV, 149773471bf0Spatrick CMP16rr, CMP16rr_REV, 149873471bf0Spatrick CMP32rr, CMP32rr_REV, 149973471bf0Spatrick CMP64rr, CMP64rr_REV)>; 150073471bf0Spatrick 150173471bf0Spatrickdef Zn3WriteFZeroIdiom : SchedWriteVariant<[ 150273471bf0Spatrick SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 150373471bf0Spatrick SchedVar<NoSchedPred, [WriteFLogic]> 150473471bf0Spatrick]>; 150573471bf0Spatrick// NOTE: XORPSrr, XORPDrr are not zero-cycle! 150673471bf0Spatrickdef : InstRW<[Zn3WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr, 150773471bf0Spatrick VANDNPSrr, VANDNPDrr)>; 150873471bf0Spatrick 150973471bf0Spatrickdef Zn3WriteFZeroIdiomY : SchedWriteVariant<[ 151073471bf0Spatrick SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 151173471bf0Spatrick SchedVar<NoSchedPred, [WriteFLogicY]> 151273471bf0Spatrick]>; 151373471bf0Spatrickdef : InstRW<[Zn3WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 151473471bf0Spatrick VANDNPSYrr, VANDNPDYrr)>; 151573471bf0Spatrick 151673471bf0Spatrickdef Zn3WriteVZeroIdiomLogicX : SchedWriteVariant<[ 151773471bf0Spatrick SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 151873471bf0Spatrick SchedVar<NoSchedPred, [WriteVecLogicX]> 151973471bf0Spatrick]>; 152073471bf0Spatrick// NOTE: PXORrr,PANDNrr are not zero-cycle! 152173471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>; 152273471bf0Spatrick 152373471bf0Spatrickdef Zn3WriteVZeroIdiomLogicY : SchedWriteVariant<[ 152473471bf0Spatrick SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 152573471bf0Spatrick SchedVar<NoSchedPred, [WriteVecLogicY]> 152673471bf0Spatrick]>; 152773471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>; 152873471bf0Spatrick 152973471bf0Spatrickdef Zn3WriteVZeroIdiomALUX : SchedWriteVariant<[ 153073471bf0Spatrick SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 153173471bf0Spatrick SchedVar<NoSchedPred, [WriteVecALUX]> 153273471bf0Spatrick]>; 153373471bf0Spatrick// NOTE: PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 153473471bf0Spatrick// PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle! 153573471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomALUX], 153673471bf0Spatrick (instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 153773471bf0Spatrick VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>; 153873471bf0Spatrick 153973471bf0Spatrickdef Zn3WriteVZeroIdiomALUY : SchedWriteVariant<[ 154073471bf0Spatrick SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>, 154173471bf0Spatrick SchedVar<NoSchedPred, [WriteVecALUY]> 154273471bf0Spatrick]>; 154373471bf0Spatrickdef : InstRW<[Zn3WriteVZeroIdiomALUY], 154473471bf0Spatrick (instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 154573471bf0Spatrick VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>; 154673471bf0Spatrick 154773471bf0Spatrickdef : IsZeroIdiomFunction<[ 154873471bf0Spatrick // GPR Zero-idioms. 154973471bf0Spatrick DepBreakingClass<[ XOR32rr, XOR32rr_REV, 155073471bf0Spatrick XOR64rr, XOR64rr_REV, 155173471bf0Spatrick SUB32rr, SUB32rr_REV, 155273471bf0Spatrick SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>, 155373471bf0Spatrick 155473471bf0Spatrick // SSE XMM Zero-idioms. 155573471bf0Spatrick DepBreakingClass<[ 155673471bf0Spatrick // fp variants. 155773471bf0Spatrick XORPSrr, XORPDrr, 155873471bf0Spatrick ANDNPSrr, ANDNPDrr, 155973471bf0Spatrick 156073471bf0Spatrick // int variants. 156173471bf0Spatrick PXORrr, 156273471bf0Spatrick PANDNrr, 156373471bf0Spatrick PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 156473471bf0Spatrick PSUBSBrr, PSUBSWrr, 156573471bf0Spatrick PSUBUSBrr, PSUBUSWrr, 156673471bf0Spatrick PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr 156773471bf0Spatrick ], ZeroIdiomPredicate>, 156873471bf0Spatrick 156973471bf0Spatrick // AVX XMM Zero-idioms. 157073471bf0Spatrick DepBreakingClass<[ 157173471bf0Spatrick // fp variants. 157273471bf0Spatrick VXORPSrr, VXORPDrr, 157373471bf0Spatrick VANDNPSrr, VANDNPDrr, 157473471bf0Spatrick 157573471bf0Spatrick // int variants. 157673471bf0Spatrick VPXORrr, 157773471bf0Spatrick VPANDNrr, 157873471bf0Spatrick VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 157973471bf0Spatrick VPSUBSBrr, VPSUBSWrr, 158073471bf0Spatrick VPSUBUSBrr, VPSUBUSWrr, 158173471bf0Spatrick VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 158273471bf0Spatrick ], ZeroIdiomPredicate>, 158373471bf0Spatrick 158473471bf0Spatrick // AVX YMM Zero-idioms. 158573471bf0Spatrick DepBreakingClass<[ 158673471bf0Spatrick // fp variants. 158773471bf0Spatrick VXORPSYrr, VXORPDYrr, 158873471bf0Spatrick VANDNPSYrr, VANDNPDYrr, 158973471bf0Spatrick 159073471bf0Spatrick // int variants. 159173471bf0Spatrick VPXORYrr, 159273471bf0Spatrick VPANDNYrr, 159373471bf0Spatrick VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 159473471bf0Spatrick VPSUBSBYrr, VPSUBSWYrr, 159573471bf0Spatrick VPSUBUSBYrr, VPSUBUSWYrr, 159673471bf0Spatrick VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr 159773471bf0Spatrick ], ZeroIdiomPredicate>, 159873471bf0Spatrick]>; 159973471bf0Spatrick 160073471bf0Spatrickdef : IsDepBreakingFunction<[ 160173471bf0Spatrick // GPR 160273471bf0Spatrick DepBreakingClass<[ SBB32rr, SBB32rr_REV, 160373471bf0Spatrick SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>, 160473471bf0Spatrick DepBreakingClass<[ CMP8rr, CMP8rr_REV, 160573471bf0Spatrick CMP16rr, CMP16rr_REV, 160673471bf0Spatrick CMP32rr, CMP32rr_REV, 160773471bf0Spatrick CMP64rr, CMP64rr_REV ], CheckSameRegOperand<0, 1> >, 160873471bf0Spatrick 160973471bf0Spatrick // MMX 161073471bf0Spatrick DepBreakingClass<[ 1611*d415bd75Srobert MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr 161273471bf0Spatrick ], ZeroIdiomPredicate>, 161373471bf0Spatrick 161473471bf0Spatrick // SSE 161573471bf0Spatrick DepBreakingClass<[ 161673471bf0Spatrick PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr 161773471bf0Spatrick ], ZeroIdiomPredicate>, 161873471bf0Spatrick 161973471bf0Spatrick // AVX XMM 162073471bf0Spatrick DepBreakingClass<[ 162173471bf0Spatrick VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr 162273471bf0Spatrick ], ZeroIdiomPredicate>, 162373471bf0Spatrick 162473471bf0Spatrick // AVX YMM 162573471bf0Spatrick DepBreakingClass<[ 162673471bf0Spatrick VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr 162773471bf0Spatrick ], ZeroIdiomPredicate>, 162873471bf0Spatrick]>; 162973471bf0Spatrick 163073471bf0Spatrick} // SchedModel 1631