1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21 22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", 23 "64-bit mode (x86_64)">; 24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", 25 "32-bit mode (80386)">; 26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 46 "Support POPCNT instruction">; 47 48def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 49 "Support fxsave/fxrestore instructions">; 50 51def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 52 "Support xsave instructions">; 53 54def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 55 "Support xsaveopt instructions", 56 [FeatureXSAVE]>; 57 58def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 59 "Support xsavec instructions", 60 [FeatureXSAVE]>; 61 62def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 63 "Support xsaves instructions", 64 [FeatureXSAVE]>; 65 66def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 67 "Enable SSE instructions">; 68def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 69 "Enable SSE2 instructions", 70 [FeatureSSE1]>; 71def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 72 "Enable SSE3 instructions", 73 [FeatureSSE2]>; 74def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 75 "Enable SSSE3 instructions", 76 [FeatureSSE3]>; 77def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 78 "Enable SSE 4.1 instructions", 79 [FeatureSSSE3]>; 80def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 81 "Enable SSE 4.2 instructions", 82 [FeatureSSE41]>; 83// The MMX subtarget feature is separate from the rest of the SSE features 84// because it's important (for odd compatibility reasons) to be able to 85// turn it off explicitly while allowing SSE+ to be on. 86def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 87 "Enable MMX instructions">; 88def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 89 "Enable 3DNow! instructions", 90 [FeatureMMX]>; 91def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 92 "Enable 3DNow! Athlon instructions", 93 [Feature3DNow]>; 94// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 95// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 96// without disabling 64-bit mode. Nothing should imply this feature bit. It 97// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 98def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", 99 "Support 64-bit instructions">; 100def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", 101 "64-bit with cmpxchg16b", 102 [FeatureCMPXCHG8B]>; 103def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 104 "SHLD instruction is slow">; 105def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 106 "PMULLD instruction is slow">; 107def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 108 "true", 109 "PMADDWD is slower than PMULLD">; 110// FIXME: This should not apply to CPUs that do not have SSE. 111def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 112 "IsUAMem16Slow", "true", 113 "Slow unaligned 16-byte memory access">; 114def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 115 "IsUAMem32Slow", "true", 116 "Slow unaligned 32-byte memory access">; 117def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 118 "Support SSE 4a instructions", 119 [FeatureSSE3]>; 120 121def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 122 "Enable AVX instructions", 123 [FeatureSSE42]>; 124def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 125 "Enable AVX2 instructions", 126 [FeatureAVX]>; 127def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 128 "Enable three-operand fused multiple-add", 129 [FeatureAVX]>; 130def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 131 "Support 16-bit floating point conversion instructions", 132 [FeatureAVX]>; 133def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", 134 "Enable AVX-512 instructions", 135 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 136def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 137 "Enable AVX-512 Exponential and Reciprocal Instructions", 138 [FeatureAVX512]>; 139def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 140 "Enable AVX-512 Conflict Detection Instructions", 141 [FeatureAVX512]>; 142def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 143 "true", "Enable AVX-512 Population Count Instructions", 144 [FeatureAVX512]>; 145def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 146 "Enable AVX-512 PreFetch Instructions", 147 [FeatureAVX512]>; 148def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 149 "true", 150 "Prefetch with Intent to Write and T1 Hint">; 151def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 152 "Enable AVX-512 Doubleword and Quadword Instructions", 153 [FeatureAVX512]>; 154def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 155 "Enable AVX-512 Byte and Word Instructions", 156 [FeatureAVX512]>; 157def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 158 "Enable AVX-512 Vector Length eXtensions", 159 [FeatureAVX512]>; 160def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 161 "Enable AVX-512 Vector Byte Manipulation Instructions", 162 [FeatureBWI]>; 163def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 164 "Enable AVX-512 further Vector Byte Manipulation Instructions", 165 [FeatureBWI]>; 166def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 167 "Enable AVX-512 Integer Fused Multiple-Add", 168 [FeatureAVX512]>; 169def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 170 "Enable protection keys">; 171def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 172 "Enable AVX-512 Vector Neural Network Instructions", 173 [FeatureAVX512]>; 174def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 175 "Support AVX_VNNI encoding", 176 [FeatureAVX2]>; 177def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 178 "Support bfloat16 floating point", 179 [FeatureBWI]>; 180def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 181 "Enable AVX-512 Bit Algorithms", 182 [FeatureBWI]>; 183def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 184 "HasVP2INTERSECT", "true", 185 "Enable AVX-512 vp2intersect", 186 [FeatureAVX512]>; 187def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 188 "Enable packed carry-less multiplication instructions", 189 [FeatureSSE2]>; 190def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 191 "Enable Galois Field Arithmetic Instructions", 192 [FeatureSSE2]>; 193def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 194 "Enable vpclmulqdq instructions", 195 [FeatureAVX, FeaturePCLMUL]>; 196def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 197 "Enable four-operand fused multiple-add", 198 [FeatureAVX, FeatureSSE4A]>; 199def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 200 "Enable XOP instructions", 201 [FeatureFMA4]>; 202def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 203 "HasSSEUnalignedMem", "true", 204 "Allow unaligned memory operands with SSE instructions">; 205def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 206 "Enable AES instructions", 207 [FeatureSSE2]>; 208def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 209 "Promote selected AES instructions to AVX512/AVX registers", 210 [FeatureAVX, FeatureAES]>; 211def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 212 "Enable TBM instructions">; 213def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 214 "Enable LWP instructions">; 215def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 216 "Support MOVBE instruction">; 217def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 218 "Support RDRAND instruction">; 219def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 220 "Support FS/GS Base instructions">; 221def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 222 "Support LZCNT instruction">; 223def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 224 "Support BMI instructions">; 225def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 226 "Support BMI2 instructions">; 227def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 228 "Support RTM instructions">; 229def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 230 "Support ADX instructions">; 231def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 232 "Enable SHA instructions", 233 [FeatureSSE2]>; 234def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 235 "Support CET Shadow-Stack instructions">; 236def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 237 "Support PRFCHW instructions">; 238def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 239 "Support RDSEED instruction">; 240def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 241 "Support LAHF and SAHF instructions in 64-bit mode">; 242def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 243 "Enable MONITORX/MWAITX timer functionality">; 244def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 245 "Enable Cache Line Zero">; 246def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 247 "Enable Cache Demote">; 248def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 249 "Support ptwrite instruction">; 250def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 251 "Support AMX-TILE instructions">; 252def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 253 "Support AMX-INT8 instructions", 254 [FeatureAMXTILE]>; 255def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 256 "Support AMX-BF16 instructions", 257 [FeatureAMXTILE]>; 258def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 259 "Use LEA for adjusting the stack pointer">; 260def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", 261 "HasSlowDivide32", "true", 262 "Use 8-bit divide for positive values less than 256">; 263def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", 264 "HasSlowDivide64", "true", 265 "Use 32-bit divide for positive values less than 2^32">; 266def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", 267 "PadShortFunctions", "true", 268 "Pad short functions">; 269def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 270 "Invalidate Process-Context Identifier">; 271def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 272 "Enable Software Guard Extensions">; 273def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 274 "Flush A Cache Line Optimized">; 275def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 276 "Cache Line Write Back">; 277def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 278 "Write Back No Invalidate">; 279def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 280 "Support RDPID instructions">; 281def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 282 "Wait and pause enhancements">; 283def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 284 "Has ENQCMD instructions">; 285def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 286 "Support Key Locker kl Instructions", 287 [FeatureSSE2]>; 288def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 289 "Support Key Locker wide Instructions", 290 [FeatureKL]>; 291def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 292 "Has hreset instruction">; 293def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 294 "Has serialize instruction">; 295def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 296 "Support TSXLDTRK instructions">; 297def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 298 "Has UINTR Instructions">; 299// On some processors, instructions that implicitly take two memory operands are 300// slow. In practice, this means that CALL, PUSH, and POP with memory operands 301// should be avoided in favor of a MOV + register CALL/PUSH/POP. 302def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 303 "SlowTwoMemOps", "true", 304 "Two memory operand instructions are slow">; 305def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", 306 "LEA instruction needs inputs at AG stage">; 307def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 308 "LEA instruction with certain arguments is slow">; 309def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 310 "LEA instruction with 3 ops or certain registers is slow">; 311def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 312 "INC and DEC instructions are slower than ADD and SUB">; 313def FeatureSoftFloat 314 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 315 "Use software floating point features">; 316def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 317 "HasPOPCNTFalseDeps", "true", 318 "POPCNT has a false dependency on dest register">; 319def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 320 "HasLZCNTFalseDeps", "true", 321 "LZCNT/TZCNT have a false dependency on dest register">; 322def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 323 "platform configuration instruction">; 324// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 325// using a variable mask over multiple fixed shuffles. 326def FeatureFastVariableShuffle 327 : SubtargetFeature<"fast-variable-shuffle", 328 "HasFastVariableShuffle", 329 "true", "Shuffles with variable masks are fast">; 330// On some X86 processors, a vzeroupper instruction should be inserted after 331// using ymm/zmm registers before executing code that may use SSE instructions. 332def FeatureInsertVZEROUPPER 333 : SubtargetFeature<"vzeroupper", 334 "InsertVZEROUPPER", 335 "true", "Should insert vzeroupper instructions">; 336// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 337// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if 338// vector FSQRT has higher throughput than the corresponding NR code. 339// The idea is that throughput bound code is likely to be vectorized, so for 340// vectorized code we should care about the throughput of SQRT operations. 341// But if the code is scalar that probably means that the code has some kind of 342// dependency and we should care more about reducing the latency. 343def FeatureFastScalarFSQRT 344 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 345 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 346def FeatureFastVectorFSQRT 347 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 348 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 349// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 350// be used to replace test/set sequences. 351def FeatureFastLZCNT 352 : SubtargetFeature< 353 "fast-lzcnt", "HasFastLZCNT", "true", 354 "LZCNT instructions are as fast as most simple integer ops">; 355// If the target can efficiently decode NOPs upto 7-bytes in length. 356def FeatureFast7ByteNOP 357 : SubtargetFeature< 358 "fast-7bytenop", "HasFast7ByteNOP", "true", 359 "Target can quickly decode up to 7 byte NOPs">; 360// If the target can efficiently decode NOPs upto 11-bytes in length. 361def FeatureFast11ByteNOP 362 : SubtargetFeature< 363 "fast-11bytenop", "HasFast11ByteNOP", "true", 364 "Target can quickly decode up to 11 byte NOPs">; 365// If the target can efficiently decode NOPs upto 15-bytes in length. 366def FeatureFast15ByteNOP 367 : SubtargetFeature< 368 "fast-15bytenop", "HasFast15ByteNOP", "true", 369 "Target can quickly decode up to 15 byte NOPs">; 370// Sandy Bridge and newer processors can use SHLD with the same source on both 371// inputs to implement rotate to avoid the partial flag update of the normal 372// rotate instructions. 373def FeatureFastSHLDRotate 374 : SubtargetFeature< 375 "fast-shld-rotate", "HasFastSHLDRotate", "true", 376 "SHLD can be used as a faster rotate">; 377 378// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 379// "string operations"). See "REP String Enhancement" in the Intel Software 380// Development Manual. This feature essentially means that REP MOVSB will copy 381// using the largest available size instead of copying bytes one by one, making 382// it at least as fast as REPMOVS{W,D,Q}. 383def FeatureERMSB 384 : SubtargetFeature< 385 "ermsb", "HasERMSB", "true", 386 "REP MOVS/STOS are fast">; 387 388// Icelake and newer processors have Fast Short REP MOV. 389def FeatureFSRM 390 : SubtargetFeature< 391 "fsrm", "HasFSRM", "true", 392 "REP MOVSB of short lengths is faster">; 393 394// Bulldozer and newer processors can merge CMP/TEST (but not other 395// instructions) with conditional branches. 396def FeatureBranchFusion 397 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 398 "CMP/TEST can be fused with conditional branches">; 399 400// Sandy Bridge and newer processors have many instructions that can be 401// fused with conditional branches and pass through the CPU as a single 402// operation. 403def FeatureMacroFusion 404 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 405 "Various instructions can be fused with conditional branches">; 406 407// Gather is available since Haswell (AVX2 set). So technically, we can 408// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 409// Skylake Client processor has faster Gathers than HSW and performance is 410// similar to Skylake Server (AVX-512). 411def FeatureHasFastGather 412 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 413 "Indicates if gather is reasonably fast">; 414 415def FeaturePrefer128Bit 416 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 417 "Prefer 128-bit AVX instructions">; 418 419def FeaturePrefer256Bit 420 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 421 "Prefer 256-bit AVX instructions">; 422 423def FeaturePreferMaskRegisters 424 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 425 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 426 427// Lower indirect calls using a special construct called a `retpoline` to 428// mitigate potential Spectre v2 attacks against them. 429def FeatureRetpolineIndirectCalls 430 : SubtargetFeature< 431 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 432 "Remove speculation of indirect calls from the generated code">; 433 434// Lower indirect branches and switches either using conditional branch trees 435// or using a special construct called a `retpoline` to mitigate potential 436// Spectre v2 attacks against them. 437def FeatureRetpolineIndirectBranches 438 : SubtargetFeature< 439 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 440 "Remove speculation of indirect branches from the generated code">; 441 442// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 443// `retpoline-indirect-branches` above. 444def FeatureRetpoline 445 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 446 "Remove speculation of indirect branches from the " 447 "generated code, either by avoiding them entirely or " 448 "lowering them with a speculation blocking construct", 449 [FeatureRetpolineIndirectCalls, 450 FeatureRetpolineIndirectBranches]>; 451 452// Rely on external thunks for the emitted retpoline calls. This allows users 453// to provide their own custom thunk definitions in highly specialized 454// environments such as a kernel that does boot-time hot patching. 455def FeatureRetpolineExternalThunk 456 : SubtargetFeature< 457 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 458 "When lowering an indirect call or branch using a `retpoline`, rely " 459 "on the specified user provided thunk rather than emitting one " 460 "ourselves. Only has effect when combined with some other retpoline " 461 "feature", [FeatureRetpolineIndirectCalls]>; 462 463// Mitigate LVI attacks against indirect calls/branches and call returns 464def FeatureLVIControlFlowIntegrity 465 : SubtargetFeature< 466 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 467 "Prevent indirect calls/branches from using a memory operand, and " 468 "precede all indirect calls/branches from a register with an " 469 "LFENCE instruction to serialize control flow. Also decompose RET " 470 "instructions into a POP+LFENCE+JMP sequence.">; 471 472// Enable SESES to mitigate speculative execution attacks 473def FeatureSpeculativeExecutionSideEffectSuppression 474 : SubtargetFeature< 475 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 476 "Prevent speculative execution side channel timing attacks by " 477 "inserting a speculation barrier before memory reads, memory writes, " 478 "and conditional branches. Implies LVI Control Flow integrity.", 479 [FeatureLVIControlFlowIntegrity]>; 480 481// Mitigate LVI attacks against data loads 482def FeatureLVILoadHardening 483 : SubtargetFeature< 484 "lvi-load-hardening", "UseLVILoadHardening", "true", 485 "Insert LFENCE instructions to prevent data speculatively injected " 486 "into loads from being used maliciously.">; 487 488// Direct Move instructions. 489def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 490 "Support movdiri instruction">; 491def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 492 "Support movdir64b instruction">; 493 494def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 495 "Indicates that the BEXTR instruction is implemented as a single uop " 496 "with good throughput">; 497 498// Combine vector math operations with shuffles into horizontal math 499// instructions if a CPU implements horizontal operations (introduced with 500// SSE3) with better latency/throughput than the alternative sequence. 501def FeatureFastHorizontalOps 502 : SubtargetFeature< 503 "fast-hops", "HasFastHorizontalOps", "true", 504 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 505 "normal vector instructions with shuffles">; 506 507def FeatureFastScalarShiftMasks 508 : SubtargetFeature< 509 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 510 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 511 512def FeatureFastVectorShiftMasks 513 : SubtargetFeature< 514 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 515 "Prefer a left/right vector logical shift pair over a shift+and pair">; 516 517def FeatureUseGLMDivSqrtCosts 518 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 519 "Use Goldmont specific floating point div/sqrt costs">; 520 521// Enable use of alias analysis during code generation. 522def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", 523 "Use alias analysis during codegen">; 524 525// Bonnell 526def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; 527// Silvermont 528def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">; 529 530//===----------------------------------------------------------------------===// 531// Register File Description 532//===----------------------------------------------------------------------===// 533 534include "X86RegisterInfo.td" 535include "X86RegisterBanks.td" 536 537//===----------------------------------------------------------------------===// 538// Instruction Descriptions 539//===----------------------------------------------------------------------===// 540 541include "X86Schedule.td" 542include "X86InstrInfo.td" 543include "X86SchedPredicates.td" 544 545def X86InstrInfo : InstrInfo; 546 547//===----------------------------------------------------------------------===// 548// X86 Scheduler Models 549//===----------------------------------------------------------------------===// 550 551include "X86ScheduleAtom.td" 552include "X86SchedSandyBridge.td" 553include "X86SchedHaswell.td" 554include "X86SchedBroadwell.td" 555include "X86ScheduleSLM.td" 556include "X86ScheduleZnver1.td" 557include "X86ScheduleZnver2.td" 558include "X86ScheduleBdVer2.td" 559include "X86ScheduleBtVer2.td" 560include "X86SchedSkylakeClient.td" 561include "X86SchedSkylakeServer.td" 562 563//===----------------------------------------------------------------------===// 564// X86 Processor Feature Lists 565//===----------------------------------------------------------------------===// 566 567def ProcessorFeatures { 568 // x86-64 and x86-64-v[234] 569 list<SubtargetFeature> X86_64V1Features = [ 570 FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, 571 FeatureFXSR, FeatureNOPL, Feature64Bit 572 ]; 573 list<SubtargetFeature> X86_64V2Features = !listconcat( 574 X86_64V1Features, 575 [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]); 576 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 577 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 578 FeatureMOVBE, FeatureXSAVE 579 ]); 580 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 581 FeatureBWI, 582 FeatureCDI, 583 FeatureDQI, 584 FeatureVLX, 585 ]); 586 587 // Nehalem 588 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 589 list<SubtargetFeature> NHMTuning = [FeatureMacroFusion, 590 FeatureInsertVZEROUPPER]; 591 592 // Westmere 593 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 594 list<SubtargetFeature> WSMTuning = NHMTuning; 595 list<SubtargetFeature> WSMFeatures = 596 !listconcat(NHMFeatures, WSMAdditionalFeatures); 597 598 // Sandybridge 599 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 600 FeatureXSAVE, 601 FeatureXSAVEOPT]; 602 list<SubtargetFeature> SNBTuning = [FeatureMacroFusion, 603 FeatureSlow3OpsLEA, 604 FeatureSlowDivide64, 605 FeatureSlowUAMem32, 606 FeatureFastScalarFSQRT, 607 FeatureFastSHLDRotate, 608 FeatureFast15ByteNOP, 609 FeaturePOPCNTFalseDeps, 610 FeatureInsertVZEROUPPER]; 611 list<SubtargetFeature> SNBFeatures = 612 !listconcat(WSMFeatures, SNBAdditionalFeatures); 613 614 // Ivybridge 615 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 616 FeatureF16C, 617 FeatureFSGSBase]; 618 list<SubtargetFeature> IVBTuning = SNBTuning; 619 list<SubtargetFeature> IVBFeatures = 620 !listconcat(SNBFeatures, IVBAdditionalFeatures); 621 622 // Haswell 623 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 624 FeatureBMI, 625 FeatureBMI2, 626 FeatureERMSB, 627 FeatureFMA, 628 FeatureINVPCID, 629 FeatureLZCNT, 630 FeatureMOVBE]; 631 list<SubtargetFeature> HSWTuning = [FeatureMacroFusion, 632 FeatureSlow3OpsLEA, 633 FeatureSlowDivide64, 634 FeatureFastScalarFSQRT, 635 FeatureFastSHLDRotate, 636 FeatureFast15ByteNOP, 637 FeatureFastVariableShuffle, 638 FeaturePOPCNTFalseDeps, 639 FeatureLZCNTFalseDeps, 640 FeatureInsertVZEROUPPER]; 641 list<SubtargetFeature> HSWFeatures = 642 !listconcat(IVBFeatures, HSWAdditionalFeatures); 643 644 // Broadwell 645 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 646 FeatureRDSEED, 647 FeaturePRFCHW]; 648 list<SubtargetFeature> BDWTuning = HSWTuning; 649 list<SubtargetFeature> BDWFeatures = 650 !listconcat(HSWFeatures, BDWAdditionalFeatures); 651 652 // Skylake 653 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 654 FeatureXSAVEC, 655 FeatureXSAVES, 656 FeatureCLFLUSHOPT, 657 FeatureSGX]; 658 list<SubtargetFeature> SKLTuning = [FeatureHasFastGather, 659 FeatureMacroFusion, 660 FeatureSlow3OpsLEA, 661 FeatureSlowDivide64, 662 FeatureFastScalarFSQRT, 663 FeatureFastVectorFSQRT, 664 FeatureFastSHLDRotate, 665 FeatureFast15ByteNOP, 666 FeatureFastVariableShuffle, 667 FeaturePOPCNTFalseDeps, 668 FeatureInsertVZEROUPPER]; 669 list<SubtargetFeature> SKLFeatures = 670 !listconcat(BDWFeatures, SKLAdditionalFeatures); 671 672 // Skylake-AVX512 673 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 674 FeatureXSAVEC, 675 FeatureXSAVES, 676 FeatureCLFLUSHOPT, 677 FeatureAVX512, 678 FeatureCDI, 679 FeatureDQI, 680 FeatureBWI, 681 FeatureVLX, 682 FeaturePKU, 683 FeatureCLWB]; 684 list<SubtargetFeature> SKXTuning = [FeatureHasFastGather, 685 FeatureMacroFusion, 686 FeatureSlow3OpsLEA, 687 FeatureSlowDivide64, 688 FeatureFastScalarFSQRT, 689 FeatureFastVectorFSQRT, 690 FeatureFastSHLDRotate, 691 FeatureFast15ByteNOP, 692 FeatureFastVariableShuffle, 693 FeaturePrefer256Bit, 694 FeaturePOPCNTFalseDeps, 695 FeatureInsertVZEROUPPER]; 696 list<SubtargetFeature> SKXFeatures = 697 !listconcat(BDWFeatures, SKXAdditionalFeatures); 698 699 // Cascadelake 700 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 701 list<SubtargetFeature> CLXTuning = SKXTuning; 702 list<SubtargetFeature> CLXFeatures = 703 !listconcat(SKXFeatures, CLXAdditionalFeatures); 704 705 // Cooperlake 706 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 707 list<SubtargetFeature> CPXTuning = SKXTuning; 708 list<SubtargetFeature> CPXFeatures = 709 !listconcat(CLXFeatures, CPXAdditionalFeatures); 710 711 // Cannonlake 712 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 713 FeatureCDI, 714 FeatureDQI, 715 FeatureBWI, 716 FeatureVLX, 717 FeaturePKU, 718 FeatureVBMI, 719 FeatureIFMA, 720 FeatureSHA]; 721 list<SubtargetFeature> CNLTuning = [FeatureHasFastGather, 722 FeatureMacroFusion, 723 FeatureSlow3OpsLEA, 724 FeatureSlowDivide64, 725 FeatureFastScalarFSQRT, 726 FeatureFastVectorFSQRT, 727 FeatureFastSHLDRotate, 728 FeatureFast15ByteNOP, 729 FeatureFastVariableShuffle, 730 FeaturePrefer256Bit, 731 FeatureInsertVZEROUPPER]; 732 list<SubtargetFeature> CNLFeatures = 733 !listconcat(SKLFeatures, CNLAdditionalFeatures); 734 735 // Icelake 736 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 737 FeatureVAES, 738 FeatureVBMI2, 739 FeatureVNNI, 740 FeatureVPCLMULQDQ, 741 FeatureVPOPCNTDQ, 742 FeatureGFNI, 743 FeatureCLWB, 744 FeatureRDPID, 745 FeatureFSRM]; 746 list<SubtargetFeature> ICLTuning = CNLTuning; 747 list<SubtargetFeature> ICLFeatures = 748 !listconcat(CNLFeatures, ICLAdditionalFeatures); 749 750 // Icelake Server 751 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 752 FeatureWBNOINVD]; 753 list<SubtargetFeature> ICXTuning = CNLTuning; 754 list<SubtargetFeature> ICXFeatures = 755 !listconcat(ICLFeatures, ICXAdditionalFeatures); 756 757 //Tigerlake 758 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 759 FeatureMOVDIRI, 760 FeatureMOVDIR64B, 761 FeatureSHSTK]; 762 list<SubtargetFeature> TGLTuning = CNLTuning; 763 list<SubtargetFeature> TGLFeatures = 764 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 765 766 //Sapphirerapids 767 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 768 FeatureAMXINT8, 769 FeatureAMXBF16, 770 FeatureBF16, 771 FeatureSERIALIZE, 772 FeatureCLDEMOTE, 773 FeatureWAITPKG, 774 FeaturePTWRITE, 775 FeatureAVXVNNI, 776 FeatureTSXLDTRK, 777 FeatureENQCMD, 778 FeatureSHSTK, 779 FeatureVP2INTERSECT, 780 FeatureMOVDIRI, 781 FeatureMOVDIR64B, 782 FeatureUINTR]; 783 list<SubtargetFeature> SPRTuning = ICXTuning; 784 list<SubtargetFeature> SPRFeatures = 785 !listconcat(ICXFeatures, SPRAdditionalFeatures); 786 787 // Alderlake 788 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureAVXVNNI, 789 FeatureCLDEMOTE, 790 FeatureHRESET, 791 FeaturePTWRITE, 792 FeatureSERIALIZE, 793 FeatureWAITPKG]; 794 list<SubtargetFeature> ADLTuning = SKLTuning; 795 list<SubtargetFeature> ADLFeatures = 796 !listconcat(SKLFeatures, ADLAdditionalFeatures); 797 798 // Atom 799 list<SubtargetFeature> AtomFeatures = [FeatureX87, 800 FeatureCMPXCHG8B, 801 FeatureCMOV, 802 FeatureMMX, 803 FeatureSSSE3, 804 FeatureFXSR, 805 FeatureNOPL, 806 Feature64Bit, 807 FeatureCMPXCHG16B, 808 FeatureMOVBE, 809 FeatureLAHFSAHF]; 810 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 811 FeatureSlowUAMem16, 812 FeatureLEAForSP, 813 FeatureSlowDivide32, 814 FeatureSlowDivide64, 815 FeatureSlowTwoMemOps, 816 FeatureLEAUsesAG, 817 FeaturePadShortFunctions, 818 FeatureInsertVZEROUPPER]; 819 820 // Silvermont 821 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 822 FeaturePOPCNT, 823 FeaturePCLMUL, 824 FeaturePRFCHW, 825 FeatureRDRAND]; 826 list<SubtargetFeature> SLMTuning = [ProcIntelSLM, 827 FeatureSlowTwoMemOps, 828 FeatureSlowLEA, 829 FeatureSlowIncDec, 830 FeatureSlowDivide64, 831 FeatureSlowPMULLD, 832 FeatureFast7ByteNOP, 833 FeaturePOPCNTFalseDeps, 834 FeatureInsertVZEROUPPER]; 835 list<SubtargetFeature> SLMFeatures = 836 !listconcat(AtomFeatures, SLMAdditionalFeatures); 837 838 // Goldmont 839 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 840 FeatureSHA, 841 FeatureRDSEED, 842 FeatureXSAVE, 843 FeatureXSAVEOPT, 844 FeatureXSAVEC, 845 FeatureXSAVES, 846 FeatureCLFLUSHOPT, 847 FeatureFSGSBase]; 848 list<SubtargetFeature> GLMTuning = [FeatureUseGLMDivSqrtCosts, 849 FeatureSlowTwoMemOps, 850 FeatureSlowLEA, 851 FeatureSlowIncDec, 852 FeaturePOPCNTFalseDeps, 853 FeatureInsertVZEROUPPER]; 854 list<SubtargetFeature> GLMFeatures = 855 !listconcat(SLMFeatures, GLMAdditionalFeatures); 856 857 // Goldmont Plus 858 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 859 FeatureRDPID, 860 FeatureSGX]; 861 list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts, 862 FeatureSlowTwoMemOps, 863 FeatureSlowLEA, 864 FeatureSlowIncDec, 865 FeatureInsertVZEROUPPER]; 866 list<SubtargetFeature> GLPFeatures = 867 !listconcat(GLMFeatures, GLPAdditionalFeatures); 868 869 // Tremont 870 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 871 FeatureGFNI]; 872 list<SubtargetFeature> TRMTuning = GLPTuning; 873 list<SubtargetFeature> TRMFeatures = 874 !listconcat(GLPFeatures, TRMAdditionalFeatures); 875 876 // Knights Landing 877 list<SubtargetFeature> KNLFeatures = [FeatureX87, 878 FeatureCMPXCHG8B, 879 FeatureCMOV, 880 FeatureMMX, 881 FeatureFXSR, 882 FeatureNOPL, 883 Feature64Bit, 884 FeatureCMPXCHG16B, 885 FeaturePOPCNT, 886 FeaturePCLMUL, 887 FeatureXSAVE, 888 FeatureXSAVEOPT, 889 FeatureLAHFSAHF, 890 FeatureAES, 891 FeatureRDRAND, 892 FeatureF16C, 893 FeatureFSGSBase, 894 FeatureAVX512, 895 FeatureERI, 896 FeatureCDI, 897 FeaturePFI, 898 FeaturePREFETCHWT1, 899 FeatureADX, 900 FeatureRDSEED, 901 FeatureMOVBE, 902 FeatureLZCNT, 903 FeatureBMI, 904 FeatureBMI2, 905 FeatureFMA, 906 FeaturePRFCHW]; 907 list<SubtargetFeature> KNLTuning = [FeatureSlowDivide64, 908 FeatureSlow3OpsLEA, 909 FeatureSlowIncDec, 910 FeatureSlowTwoMemOps, 911 FeaturePreferMaskRegisters, 912 FeatureHasFastGather, 913 FeatureSlowPMADDWD]; 914 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 915 list<SubtargetFeature> KNMFeatures = 916 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 917 918 // Barcelona 919 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 920 FeatureCMPXCHG8B, 921 FeatureSSE4A, 922 Feature3DNowA, 923 FeatureFXSR, 924 FeatureNOPL, 925 FeatureCMPXCHG16B, 926 FeaturePRFCHW, 927 FeatureLZCNT, 928 FeaturePOPCNT, 929 FeatureLAHFSAHF, 930 FeatureCMOV, 931 Feature64Bit]; 932 list<SubtargetFeature> BarcelonaTuning = [FeatureFastScalarShiftMasks, 933 FeatureSlowSHLD, 934 FeatureInsertVZEROUPPER]; 935 936 // Bobcat 937 list<SubtargetFeature> BtVer1Features = [FeatureX87, 938 FeatureCMPXCHG8B, 939 FeatureCMOV, 940 FeatureMMX, 941 FeatureSSSE3, 942 FeatureSSE4A, 943 FeatureFXSR, 944 FeatureNOPL, 945 Feature64Bit, 946 FeatureCMPXCHG16B, 947 FeaturePRFCHW, 948 FeatureLZCNT, 949 FeaturePOPCNT, 950 FeatureLAHFSAHF]; 951 list<SubtargetFeature> BtVer1Tuning = [FeatureFast15ByteNOP, 952 FeatureFastScalarShiftMasks, 953 FeatureFastVectorShiftMasks, 954 FeatureSlowSHLD, 955 FeatureInsertVZEROUPPER]; 956 957 // Jaguar 958 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 959 FeatureAES, 960 FeaturePCLMUL, 961 FeatureBMI, 962 FeatureF16C, 963 FeatureMOVBE, 964 FeatureXSAVE, 965 FeatureXSAVEOPT]; 966 list<SubtargetFeature> BtVer2Tuning = [FeatureFastLZCNT, 967 FeatureFastBEXTR, 968 FeatureFastHorizontalOps, 969 FeatureFast15ByteNOP, 970 FeatureFastScalarShiftMasks, 971 FeatureFastVectorShiftMasks, 972 FeatureSlowSHLD]; 973 list<SubtargetFeature> BtVer2Features = 974 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 975 976 // Bulldozer 977 list<SubtargetFeature> BdVer1Features = [FeatureX87, 978 FeatureCMPXCHG8B, 979 FeatureCMOV, 980 FeatureXOP, 981 Feature64Bit, 982 FeatureCMPXCHG16B, 983 FeatureAES, 984 FeaturePRFCHW, 985 FeaturePCLMUL, 986 FeatureMMX, 987 FeatureFXSR, 988 FeatureNOPL, 989 FeatureLZCNT, 990 FeaturePOPCNT, 991 FeatureXSAVE, 992 FeatureLWP, 993 FeatureLAHFSAHF]; 994 list<SubtargetFeature> BdVer1Tuning = [FeatureSlowSHLD, 995 FeatureFast11ByteNOP, 996 FeatureFastScalarShiftMasks, 997 FeatureBranchFusion, 998 FeatureInsertVZEROUPPER]; 999 1000 // PileDriver 1001 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1002 FeatureBMI, 1003 FeatureTBM, 1004 FeatureFMA, 1005 FeatureFastBEXTR]; 1006 list<SubtargetFeature> BdVer2Tuning = BdVer1Tuning; 1007 list<SubtargetFeature> BdVer2Features = 1008 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1009 1010 // Steamroller 1011 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1012 FeatureFSGSBase]; 1013 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1014 list<SubtargetFeature> BdVer3Features = 1015 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1016 1017 // Excavator 1018 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1019 FeatureBMI2, 1020 FeatureMOVBE, 1021 FeatureRDRAND, 1022 FeatureMWAITX]; 1023 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1024 list<SubtargetFeature> BdVer4Features = 1025 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1026 1027 1028 // AMD Zen Processors common ISAs 1029 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1030 FeatureAES, 1031 FeatureAVX2, 1032 FeatureBMI, 1033 FeatureBMI2, 1034 FeatureCLFLUSHOPT, 1035 FeatureCLZERO, 1036 FeatureCMOV, 1037 Feature64Bit, 1038 FeatureCMPXCHG16B, 1039 FeatureF16C, 1040 FeatureFMA, 1041 FeatureFSGSBase, 1042 FeatureFXSR, 1043 FeatureNOPL, 1044 FeatureLAHFSAHF, 1045 FeatureLZCNT, 1046 FeatureMMX, 1047 FeatureMOVBE, 1048 FeatureMWAITX, 1049 FeaturePCLMUL, 1050 FeaturePOPCNT, 1051 FeaturePRFCHW, 1052 FeatureRDRAND, 1053 FeatureRDSEED, 1054 FeatureSHA, 1055 FeatureSSE4A, 1056 FeatureX87, 1057 FeatureXSAVE, 1058 FeatureXSAVEC, 1059 FeatureXSAVEOPT, 1060 FeatureXSAVES]; 1061 list<SubtargetFeature> ZNTuning = [FeatureFastLZCNT, 1062 FeatureFastBEXTR, 1063 FeatureFast15ByteNOP, 1064 FeatureBranchFusion, 1065 FeatureFastScalarShiftMasks, 1066 FeatureSlowSHLD, 1067 FeatureInsertVZEROUPPER]; 1068 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1069 FeatureRDPID, 1070 FeatureWBNOINVD]; 1071 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1072 list<SubtargetFeature> ZN2Features = 1073 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1074 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureINVPCID, 1075 FeaturePKU, 1076 FeatureVAES, 1077 FeatureVPCLMULQDQ]; 1078 list<SubtargetFeature> ZN3Tuning = ZNTuning; 1079 list<SubtargetFeature> ZN3Features = 1080 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1081} 1082 1083//===----------------------------------------------------------------------===// 1084// X86 processors supported. 1085//===----------------------------------------------------------------------===// 1086 1087class Proc<string Name, list<SubtargetFeature> Features, 1088 list<SubtargetFeature> TuneFeatures> 1089 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1090 1091class ProcModel<string Name, SchedMachineModel Model, 1092 list<SubtargetFeature> Features, 1093 list<SubtargetFeature> TuneFeatures> 1094 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1095 1096// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1097// if i386/i486 is specifically requested. 1098// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1099// constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled. 1100// It has no effect on code generation. 1101def : ProcModel<"generic", SandyBridgeModel, 1102 [FeatureX87, FeatureCMPXCHG8B, Feature64Bit], 1103 [FeatureSlow3OpsLEA, 1104 FeatureSlowDivide64, 1105 FeatureSlowIncDec, 1106 FeatureMacroFusion, 1107 FeatureInsertVZEROUPPER]>; 1108 1109def : Proc<"i386", [FeatureX87], 1110 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1111def : Proc<"i486", [FeatureX87], 1112 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1113def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B], 1114 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1115def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B], 1116 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1117def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], 1118 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1119 1120def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV], 1121 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1122def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, 1123 FeatureNOPL], 1124 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1125 1126def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV, 1127 FeatureFXSR, FeatureNOPL], 1128 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1129 1130foreach P = ["pentium3", "pentium3m"] in { 1131 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, 1132 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1133 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1134} 1135 1136// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1137// The intent is to enable it for pentium4 which is the current default 1138// processor in a vanilla 32-bit clang compilation when no specific 1139// architecture is specified. This generally gives a nice performance 1140// increase on silvermont, with largely neutral behavior on other 1141// contemporary large core processors. 1142// pentium-m, pentium4m, prescott and nocona are included as a preventative 1143// measure to avoid performance surprises, in case clang's default cpu 1144// changes slightly. 1145 1146def : ProcModel<"pentium-m", GenericPostRAModel, 1147 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, 1148 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1149 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1150 1151foreach P = ["pentium4", "pentium4m"] in { 1152 def : ProcModel<P, GenericPostRAModel, 1153 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, 1154 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1155 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1156} 1157 1158// Intel Quark. 1159def : Proc<"lakemont", [FeatureCMPXCHG8B], 1160 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1161 1162// Intel Core Duo. 1163def : ProcModel<"yonah", SandyBridgeModel, 1164 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, 1165 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1166 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1167 1168// NetBurst. 1169def : ProcModel<"prescott", GenericPostRAModel, 1170 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, 1171 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1172 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1173def : ProcModel<"nocona", GenericPostRAModel, [ 1174 FeatureX87, 1175 FeatureCMPXCHG8B, 1176 FeatureCMOV, 1177 FeatureMMX, 1178 FeatureSSE3, 1179 FeatureFXSR, 1180 FeatureNOPL, 1181 Feature64Bit, 1182 FeatureCMPXCHG16B, 1183], 1184[ 1185 FeatureSlowUAMem16, 1186 FeatureInsertVZEROUPPER 1187]>; 1188 1189// Intel Core 2 Solo/Duo. 1190def : ProcModel<"core2", SandyBridgeModel, [ 1191 FeatureX87, 1192 FeatureCMPXCHG8B, 1193 FeatureCMOV, 1194 FeatureMMX, 1195 FeatureSSSE3, 1196 FeatureFXSR, 1197 FeatureNOPL, 1198 Feature64Bit, 1199 FeatureCMPXCHG16B, 1200 FeatureLAHFSAHF 1201], 1202[ 1203 FeatureMacroFusion, 1204 FeatureSlowUAMem16, 1205 FeatureInsertVZEROUPPER 1206]>; 1207def : ProcModel<"penryn", SandyBridgeModel, [ 1208 FeatureX87, 1209 FeatureCMPXCHG8B, 1210 FeatureCMOV, 1211 FeatureMMX, 1212 FeatureSSE41, 1213 FeatureFXSR, 1214 FeatureNOPL, 1215 Feature64Bit, 1216 FeatureCMPXCHG16B, 1217 FeatureLAHFSAHF 1218], 1219[ 1220 FeatureMacroFusion, 1221 FeatureSlowUAMem16, 1222 FeatureInsertVZEROUPPER 1223]>; 1224 1225// Atom CPUs. 1226foreach P = ["bonnell", "atom"] in { 1227 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1228 ProcessorFeatures.AtomTuning>; 1229} 1230 1231foreach P = ["silvermont", "slm"] in { 1232 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1233 ProcessorFeatures.SLMTuning>; 1234} 1235 1236def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1237 ProcessorFeatures.GLMTuning>; 1238def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures, 1239 ProcessorFeatures.GLPTuning>; 1240def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1241 ProcessorFeatures.TRMTuning>; 1242 1243// "Arrandale" along with corei3 and corei5 1244foreach P = ["nehalem", "corei7"] in { 1245 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1246 ProcessorFeatures.NHMTuning>; 1247} 1248 1249// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1250def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1251 ProcessorFeatures.WSMTuning>; 1252 1253foreach P = ["sandybridge", "corei7-avx"] in { 1254 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1255 ProcessorFeatures.SNBTuning>; 1256} 1257 1258foreach P = ["ivybridge", "core-avx-i"] in { 1259 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1260 ProcessorFeatures.IVBTuning>; 1261} 1262 1263foreach P = ["haswell", "core-avx2"] in { 1264 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1265 ProcessorFeatures.HSWTuning>; 1266} 1267 1268def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures, 1269 ProcessorFeatures.BDWTuning>; 1270 1271def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1272 ProcessorFeatures.SKLTuning>; 1273 1274// FIXME: define KNL scheduler model 1275def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures, 1276 ProcessorFeatures.KNLTuning>; 1277def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1278 ProcessorFeatures.KNLTuning>; 1279 1280foreach P = ["skylake-avx512", "skx"] in { 1281 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1282 ProcessorFeatures.SKXTuning>; 1283} 1284 1285def : ProcModel<"cascadelake", SkylakeServerModel, 1286 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1287def : ProcModel<"cooperlake", SkylakeServerModel, 1288 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1289def : ProcModel<"cannonlake", SkylakeServerModel, 1290 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1291def : ProcModel<"icelake-client", SkylakeServerModel, 1292 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1293def : ProcModel<"icelake-server", SkylakeServerModel, 1294 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1295def : ProcModel<"tigerlake", SkylakeServerModel, 1296 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1297def : ProcModel<"sapphirerapids", SkylakeServerModel, 1298 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1299def : ProcModel<"alderlake", SkylakeClientModel, 1300 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1301 1302// AMD CPUs. 1303 1304def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], 1305 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1306def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], 1307 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1308def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], 1309 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1310 1311foreach P = ["athlon", "athlon-tbird"] in { 1312 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA, 1313 FeatureNOPL], 1314 [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1315} 1316 1317foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1318 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, 1319 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL], 1320 [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1321} 1322 1323foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1324 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA, 1325 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV], 1326 [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16, 1327 FeatureInsertVZEROUPPER]>; 1328} 1329 1330foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1331 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA, 1332 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV, 1333 Feature64Bit], 1334 [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16, 1335 FeatureInsertVZEROUPPER]>; 1336} 1337 1338foreach P = ["amdfam10", "barcelona"] in { 1339 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1340 ProcessorFeatures.BarcelonaTuning>; 1341} 1342 1343// Bobcat 1344def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1345 ProcessorFeatures.BtVer1Tuning>; 1346// Jaguar 1347def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1348 ProcessorFeatures.BtVer2Tuning>; 1349 1350// Bulldozer 1351def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1352 ProcessorFeatures.BdVer1Tuning>; 1353// Piledriver 1354def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1355 ProcessorFeatures.BdVer2Tuning>; 1356// Steamroller 1357def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, 1358 ProcessorFeatures.BdVer3Tuning>; 1359// Excavator 1360def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, 1361 ProcessorFeatures.BdVer4Tuning>; 1362 1363def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1364 ProcessorFeatures.ZNTuning>; 1365def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1366 ProcessorFeatures.ZN2Tuning>; 1367def : ProcModel<"znver3", Znver2Model, ProcessorFeatures.ZN3Features, 1368 ProcessorFeatures.ZN3Tuning>; 1369 1370def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], 1371 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1372 1373def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1374 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1375def : Proc<"winchip2", [FeatureX87, Feature3DNow], 1376 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1377def : Proc<"c3", [FeatureX87, Feature3DNow], 1378 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1379def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, 1380 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1381 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1382 1383// We also provide a generic 64-bit specific x86 processor model which tries to 1384// be good for modern chips without enabling instruction set encodings past the 1385// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1386// modern 64-bit x86 chip, and enables features that are generally beneficial. 1387// 1388// We currently use the Sandy Bridge model as the default scheduling model as 1389// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1390// covers a huge swath of x86 processors. If there are specific scheduling 1391// knobs which need to be tuned differently for AMD chips, we might consider 1392// forming a common base for them. 1393def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1394[ 1395 FeatureSlow3OpsLEA, 1396 FeatureSlowDivide64, 1397 FeatureSlowIncDec, 1398 FeatureMacroFusion, 1399 FeatureInsertVZEROUPPER 1400]>; 1401 1402// x86-64 micro-architecture levels. 1403def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1404 ProcessorFeatures.SNBTuning>; 1405// Close to Haswell. 1406def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1407 ProcessorFeatures.HSWTuning>; 1408// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1409def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features, 1410 ProcessorFeatures.SKXTuning>; 1411 1412//===----------------------------------------------------------------------===// 1413// Calling Conventions 1414//===----------------------------------------------------------------------===// 1415 1416include "X86CallingConv.td" 1417 1418 1419//===----------------------------------------------------------------------===// 1420// Assembly Parser 1421//===----------------------------------------------------------------------===// 1422 1423def ATTAsmParserVariant : AsmParserVariant { 1424 int Variant = 0; 1425 1426 // Variant name. 1427 string Name = "att"; 1428 1429 // Discard comments in assembly strings. 1430 string CommentDelimiter = "#"; 1431 1432 // Recognize hard coded registers. 1433 string RegisterPrefix = "%"; 1434} 1435 1436def IntelAsmParserVariant : AsmParserVariant { 1437 int Variant = 1; 1438 1439 // Variant name. 1440 string Name = "intel"; 1441 1442 // Discard comments in assembly strings. 1443 string CommentDelimiter = ";"; 1444 1445 // Recognize hard coded registers. 1446 string RegisterPrefix = ""; 1447} 1448 1449//===----------------------------------------------------------------------===// 1450// Assembly Printers 1451//===----------------------------------------------------------------------===// 1452 1453// The X86 target supports two different syntaxes for emitting machine code. 1454// This is controlled by the -x86-asm-syntax={att|intel} 1455def ATTAsmWriter : AsmWriter { 1456 string AsmWriterClassName = "ATTInstPrinter"; 1457 int Variant = 0; 1458} 1459def IntelAsmWriter : AsmWriter { 1460 string AsmWriterClassName = "IntelInstPrinter"; 1461 int Variant = 1; 1462} 1463 1464def X86 : Target { 1465 // Information about the instructions... 1466 let InstructionSet = X86InstrInfo; 1467 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1468 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1469 let AllowRegisterRenaming = 1; 1470} 1471 1472//===----------------------------------------------------------------------===// 1473// Pfm Counters 1474//===----------------------------------------------------------------------===// 1475 1476include "X86PfmCounters.td" 1477