1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the X86 AVX512 instruction set, defining the 11// instructions, and properties of the instructions which are needed for code 12// generation, machine code emission, and analysis. 13// 14//===----------------------------------------------------------------------===// 15 16// Group template arguments that can be derived from the vector type (EltNum x 17// EltVT). These are things like the register class for the writemask, etc. 18// The idea is to pass one of these as the template argument rather than the 19// individual arguments. 20// The template is also used for scalar types, in this case numelts is 1. 21class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 22 string suffix = ""> { 23 RegisterClass RC = rc; 24 ValueType EltVT = eltvt; 25 int NumElts = numelts; 26 27 // Corresponding mask register class. 28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 29 30 // Corresponding write-mask register class. 31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 32 33 // The mask VT. 34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 35 36 // Suffix used in the instruction mnemonic. 37 string Suffix = suffix; 38 39 // VTName is a string name for vector VT. For vector types it will be 40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 41 // It is a little bit complex for scalar types, where NumElts = 1. 42 // In this case we build v4f32 or v2f64 43 string VTName = "v" # !if (!eq (NumElts, 1), 44 !if (!eq (EltVT.Size, 32), 4, 45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 46 47 // The vector VT. 48 ValueType VT = !cast<ValueType>(VTName); 49 50 string EltTypeName = !cast<string>(EltVT); 51 // Size of the element type in bits, e.g. 32 for v16i32. 52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 53 int EltSize = EltVT.Size; 54 55 // "i" for integer types and "f" for floating-point types 56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 57 58 // Size of RC in bits, e.g. 512 for VR512. 59 int Size = VT.Size; 60 61 // The corresponding memory operand, e.g. i512mem for VR512. 62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 64 // FP scalar memory operand for intrinsics - ssmem/sdmem. 65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 67 68 // Load patterns 69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64 70 // due to load promotion during legalization 71 PatFrag LdFrag = !cast<PatFrag>("load" # 72 !if (!eq (TypeVariantName, "i"), 73 !if (!eq (Size, 128), "v2i64", 74 !if (!eq (Size, 256), "v4i64", 75 !if (!eq (Size, 512), "v8i64", 76 VTName))), VTName)); 77 78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # 79 !if (!eq (TypeVariantName, "i"), 80 !if (!eq (Size, 128), "v2i64", 81 !if (!eq (Size, 256), "v4i64", 82 !if (!eq (Size, 512), "v8i64", 83 VTName))), VTName)); 84 85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 86 87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), 88 !cast<ComplexPattern>("sse_load_f32"), 89 !if (!eq (EltTypeName, "f64"), 90 !cast<ComplexPattern>("sse_load_f64"), 91 ?)); 92 93 // The string to specify embedded broadcast in assembly. 94 string BroadcastStr = "{1to" # NumElts # "}"; 95 96 // 8-bit compressed displacement tuple/subvector format. This is only 97 // defined for NumElts <= 8. 98 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 99 !cast<CD8VForm>("CD8VT" # NumElts), ?); 100 101 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 102 !if (!eq (Size, 256), sub_ymm, ?)); 103 104 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 105 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 106 SSEPackedInt)); 107 108 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 109 110 // A vector tye of the same width with element type i64. This is used to 111 // create patterns for logic ops. 112 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64"); 113 114 // A vector type of the same width with element type i32. This is used to 115 // create the canonical constant zero node ImmAllZerosV. 116 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32"); 117 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); 118 119 string ZSuffix = !if (!eq (Size, 128), "Z128", 120 !if (!eq (Size, 256), "Z256", "Z")); 121} 122 123def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 124def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 125def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 126def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 127def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 128def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 129 130// "x" in v32i8x_info means RC = VR256X 131def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 132def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 133def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 134def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 135def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 136def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 137 138def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 139def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 140def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 141def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 142def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 143def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 144 145// We map scalar types to the smallest (128-bit) vector type 146// with the appropriate element type. This allows to use the same masking logic. 147def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 148def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 149def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 150def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 151 152class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 153 X86VectorVTInfo i128> { 154 X86VectorVTInfo info512 = i512; 155 X86VectorVTInfo info256 = i256; 156 X86VectorVTInfo info128 = i128; 157} 158 159def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 160 v16i8x_info>; 161def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 162 v8i16x_info>; 163def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 164 v4i32x_info>; 165def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 166 v2i64x_info>; 167def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 168 v4f32x_info>; 169def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 170 v2f64x_info>; 171 172class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 173 ValueType _vt> { 174 RegisterClass KRC = _krc; 175 RegisterClass KRCWM = _krcwm; 176 ValueType KVT = _vt; 177} 178 179def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 180def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 181def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 182def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 183def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 184def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 185def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 186 187// This multiclass generates the masking variants from the non-masking 188// variant. It only provides the assembly pieces for the masking variants. 189// It assumes custom ISel patterns for masking which can be provided as 190// template arguments. 191multiclass AVX512_maskable_custom<bits<8> O, Format F, 192 dag Outs, 193 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 194 string OpcodeStr, 195 string AttSrcAsm, string IntelSrcAsm, 196 list<dag> Pattern, 197 list<dag> MaskingPattern, 198 list<dag> ZeroMaskingPattern, 199 string MaskingConstraint = "", 200 bit IsCommutable = 0, 201 bit IsKCommutable = 0, 202 bit IsKZCommutable = IsCommutable> { 203 let isCommutable = IsCommutable in 204 def NAME: AVX512<O, F, Outs, Ins, 205 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 206 "$dst, "#IntelSrcAsm#"}", 207 Pattern>; 208 209 // Prefer over VMOV*rrk Pat<> 210 let isCommutable = IsKCommutable in 211 def NAME#k: AVX512<O, F, Outs, MaskingIns, 212 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 213 "$dst {${mask}}, "#IntelSrcAsm#"}", 214 MaskingPattern>, 215 EVEX_K { 216 // In case of the 3src subclass this is overridden with a let. 217 string Constraints = MaskingConstraint; 218 } 219 220 // Zero mask does not add any restrictions to commute operands transformation. 221 // So, it is Ok to use IsCommutable instead of IsKCommutable. 222 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 223 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 224 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 225 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 226 ZeroMaskingPattern>, 227 EVEX_KZ; 228} 229 230 231// Common base class of AVX512_maskable and AVX512_maskable_3src. 232multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 233 dag Outs, 234 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 235 string OpcodeStr, 236 string AttSrcAsm, string IntelSrcAsm, 237 dag RHS, dag MaskingRHS, 238 SDNode Select = vselect, 239 string MaskingConstraint = "", 240 bit IsCommutable = 0, 241 bit IsKCommutable = 0, 242 bit IsKZCommutable = IsCommutable> : 243 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 244 AttSrcAsm, IntelSrcAsm, 245 [(set _.RC:$dst, RHS)], 246 [(set _.RC:$dst, MaskingRHS)], 247 [(set _.RC:$dst, 248 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 249 MaskingConstraint, IsCommutable, 250 IsKCommutable, IsKZCommutable>; 251 252// This multiclass generates the unconditional/non-masking, the masking and 253// the zero-masking variant of the vector instruction. In the masking case, the 254// perserved vector elements come from a new dummy input operand tied to $dst. 255// This version uses a separate dag for non-masking and masking. 256multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 257 dag Outs, dag Ins, string OpcodeStr, 258 string AttSrcAsm, string IntelSrcAsm, 259 dag RHS, dag MaskRHS, 260 bit IsCommutable = 0, bit IsKCommutable = 0, 261 SDNode Select = vselect> : 262 AVX512_maskable_custom<O, F, Outs, Ins, 263 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 264 !con((ins _.KRCWM:$mask), Ins), 265 OpcodeStr, AttSrcAsm, IntelSrcAsm, 266 [(set _.RC:$dst, RHS)], 267 [(set _.RC:$dst, 268 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 269 [(set _.RC:$dst, 270 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 271 "$src0 = $dst", IsCommutable, IsKCommutable>; 272 273// This multiclass generates the unconditional/non-masking, the masking and 274// the zero-masking variant of the vector instruction. In the masking case, the 275// perserved vector elements come from a new dummy input operand tied to $dst. 276multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 277 dag Outs, dag Ins, string OpcodeStr, 278 string AttSrcAsm, string IntelSrcAsm, 279 dag RHS, 280 bit IsCommutable = 0, bit IsKCommutable = 0, 281 bit IsKZCommutable = IsCommutable, 282 SDNode Select = vselect> : 283 AVX512_maskable_common<O, F, _, Outs, Ins, 284 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 285 !con((ins _.KRCWM:$mask), Ins), 286 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 287 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 288 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 289 IsKZCommutable>; 290 291// This multiclass generates the unconditional/non-masking, the masking and 292// the zero-masking variant of the scalar instruction. 293multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 294 dag Outs, dag Ins, string OpcodeStr, 295 string AttSrcAsm, string IntelSrcAsm, 296 dag RHS, 297 bit IsCommutable = 0> : 298 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 299 RHS, IsCommutable, 0, IsCommutable, X86selects>; 300 301// Similar to AVX512_maskable but in this case one of the source operands 302// ($src1) is already tied to $dst so we just use that for the preserved 303// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 304// $src1. 305multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 306 dag Outs, dag NonTiedIns, string OpcodeStr, 307 string AttSrcAsm, string IntelSrcAsm, 308 dag RHS, 309 bit IsCommutable = 0, 310 bit IsKCommutable = 0, 311 SDNode Select = vselect, 312 bit MaskOnly = 0> : 313 AVX512_maskable_common<O, F, _, Outs, 314 !con((ins _.RC:$src1), NonTiedIns), 315 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 316 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 317 OpcodeStr, AttSrcAsm, IntelSrcAsm, 318 !if(MaskOnly, (null_frag), RHS), 319 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 320 Select, "", IsCommutable, IsKCommutable>; 321 322// Similar to AVX512_maskable_3src but in this case the input VT for the tied 323// operand differs from the output VT. This requires a bitconvert on 324// the preserved vector going into the vselect. 325// NOTE: The unmasked pattern is disabled. 326multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 327 X86VectorVTInfo InVT, 328 dag Outs, dag NonTiedIns, string OpcodeStr, 329 string AttSrcAsm, string IntelSrcAsm, 330 dag RHS, bit IsCommutable = 0> : 331 AVX512_maskable_common<O, F, OutVT, Outs, 332 !con((ins InVT.RC:$src1), NonTiedIns), 333 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 334 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 335 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 336 (vselect InVT.KRCWM:$mask, RHS, 337 (bitconvert InVT.RC:$src1)), 338 vselect, "", IsCommutable>; 339 340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 341 dag Outs, dag NonTiedIns, string OpcodeStr, 342 string AttSrcAsm, string IntelSrcAsm, 343 dag RHS, 344 bit IsCommutable = 0, 345 bit IsKCommutable = 0, 346 bit MaskOnly = 0> : 347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 348 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 349 X86selects, MaskOnly>; 350 351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 352 dag Outs, dag Ins, 353 string OpcodeStr, 354 string AttSrcAsm, string IntelSrcAsm, 355 list<dag> Pattern> : 356 AVX512_maskable_custom<O, F, Outs, Ins, 357 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 358 !con((ins _.KRCWM:$mask), Ins), 359 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 360 "$src0 = $dst">; 361 362multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 363 dag Outs, dag NonTiedIns, 364 string OpcodeStr, 365 string AttSrcAsm, string IntelSrcAsm, 366 list<dag> Pattern> : 367 AVX512_maskable_custom<O, F, Outs, 368 !con((ins _.RC:$src1), NonTiedIns), 369 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 370 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 371 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 372 "">; 373 374// Instruction with mask that puts result in mask register, 375// like "compare" and "vptest" 376multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 377 dag Outs, 378 dag Ins, dag MaskingIns, 379 string OpcodeStr, 380 string AttSrcAsm, string IntelSrcAsm, 381 list<dag> Pattern, 382 list<dag> MaskingPattern, 383 bit IsCommutable = 0> { 384 let isCommutable = IsCommutable in 385 def NAME: AVX512<O, F, Outs, Ins, 386 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 387 "$dst, "#IntelSrcAsm#"}", 388 Pattern>; 389 390 def NAME#k: AVX512<O, F, Outs, MaskingIns, 391 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 392 "$dst {${mask}}, "#IntelSrcAsm#"}", 393 MaskingPattern>, EVEX_K; 394} 395 396multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 397 dag Outs, 398 dag Ins, dag MaskingIns, 399 string OpcodeStr, 400 string AttSrcAsm, string IntelSrcAsm, 401 dag RHS, dag MaskingRHS, 402 bit IsCommutable = 0> : 403 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 404 AttSrcAsm, IntelSrcAsm, 405 [(set _.KRC:$dst, RHS)], 406 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 407 408multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 409 dag Outs, dag Ins, string OpcodeStr, 410 string AttSrcAsm, string IntelSrcAsm, 411 dag RHS, bit IsCommutable = 0> : 412 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 413 !con((ins _.KRCWM:$mask), Ins), 414 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 415 (and _.KRCWM:$mask, RHS), IsCommutable>; 416 417multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _, 418 dag Outs, dag Ins, string OpcodeStr, 419 string AttSrcAsm, string IntelSrcAsm> : 420 AVX512_maskable_custom_cmp<O, F, Outs, 421 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr, 422 AttSrcAsm, IntelSrcAsm, [], []>; 423 424// This multiclass generates the unconditional/non-masking, the masking and 425// the zero-masking variant of the vector instruction. In the masking case, the 426// perserved vector elements come from a new dummy input operand tied to $dst. 427multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _, 428 dag Outs, dag Ins, string OpcodeStr, 429 string AttSrcAsm, string IntelSrcAsm, 430 dag RHS, dag MaskedRHS, 431 bit IsCommutable = 0, SDNode Select = vselect> : 432 AVX512_maskable_custom<O, F, Outs, Ins, 433 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 434 !con((ins _.KRCWM:$mask), Ins), 435 OpcodeStr, AttSrcAsm, IntelSrcAsm, 436 [(set _.RC:$dst, RHS)], 437 [(set _.RC:$dst, 438 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))], 439 [(set _.RC:$dst, 440 (Select _.KRCWM:$mask, MaskedRHS, 441 _.ImmAllZerosV))], 442 "$src0 = $dst", IsCommutable>; 443 444 445// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 446// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 447// swizzled by ExecutionDomainFix to pxor. 448// We set canFoldAsLoad because this can be converted to a constant-pool 449// load of an all-zeros value if folding it would be beneficial. 450let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 451 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 452def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 453 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 454def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 455 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 456} 457 458// Alias instructions that allow VPTERNLOG to be used with a mask to create 459// a mix of all ones and all zeros elements. This is done this way to force 460// the same register to be used as input for all three sources. 461let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 462def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 463 (ins VK16WM:$mask), "", 464 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 465 (v16i32 immAllOnesV), 466 (v16i32 immAllZerosV)))]>; 467def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 468 (ins VK8WM:$mask), "", 469 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 470 (bc_v8i64 (v16i32 immAllOnesV)), 471 (bc_v8i64 (v16i32 immAllZerosV))))]>; 472} 473 474let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 475 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 476def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 477 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 478def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 479 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 480} 481 482// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 483// This is expanded by ExpandPostRAPseudos. 484let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 485 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 486 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 487 [(set FR32X:$dst, fp32imm0)]>; 488 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 489 [(set FR64X:$dst, fpimm0)]>; 490} 491 492//===----------------------------------------------------------------------===// 493// AVX-512 - VECTOR INSERT 494// 495 496// Supports two different pattern operators for mask and unmasked ops. Allows 497// null_frag to be passed for one. 498multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 499 X86VectorVTInfo To, 500 SDPatternOperator vinsert_insert, 501 SDPatternOperator vinsert_for_mask, 502 X86FoldableSchedWrite sched> { 503 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 504 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 505 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 506 "vinsert" # From.EltTypeName # "x" # From.NumElts, 507 "$src3, $src2, $src1", "$src1, $src2, $src3", 508 (vinsert_insert:$src3 (To.VT To.RC:$src1), 509 (From.VT From.RC:$src2), 510 (iPTR imm)), 511 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 512 (From.VT From.RC:$src2), 513 (iPTR imm))>, 514 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 515 let mayLoad = 1 in 516 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 517 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 518 "vinsert" # From.EltTypeName # "x" # From.NumElts, 519 "$src3, $src2, $src1", "$src1, $src2, $src3", 520 (vinsert_insert:$src3 (To.VT To.RC:$src1), 521 (From.VT (bitconvert (From.LdFrag addr:$src2))), 522 (iPTR imm)), 523 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 524 (From.VT (bitconvert (From.LdFrag addr:$src2))), 525 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 526 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 527 Sched<[sched.Folded, ReadAfterLd]>; 528 } 529} 530 531// Passes the same pattern operator for masked and unmasked ops. 532multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 533 X86VectorVTInfo To, 534 SDPatternOperator vinsert_insert, 535 X86FoldableSchedWrite sched> : 536 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 537 538multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 539 X86VectorVTInfo To, PatFrag vinsert_insert, 540 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 541 let Predicates = p in { 542 def : Pat<(vinsert_insert:$ins 543 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 544 (To.VT (!cast<Instruction>(InstrStr#"rr") 545 To.RC:$src1, From.RC:$src2, 546 (INSERT_get_vinsert_imm To.RC:$ins)))>; 547 548 def : Pat<(vinsert_insert:$ins 549 (To.VT To.RC:$src1), 550 (From.VT (bitconvert (From.LdFrag addr:$src2))), 551 (iPTR imm)), 552 (To.VT (!cast<Instruction>(InstrStr#"rm") 553 To.RC:$src1, addr:$src2, 554 (INSERT_get_vinsert_imm To.RC:$ins)))>; 555 } 556} 557 558multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 559 ValueType EltVT64, int Opcode256, 560 X86FoldableSchedWrite sched> { 561 562 let Predicates = [HasVLX] in 563 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 564 X86VectorVTInfo< 4, EltVT32, VR128X>, 565 X86VectorVTInfo< 8, EltVT32, VR256X>, 566 vinsert128_insert, sched>, EVEX_V256; 567 568 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 569 X86VectorVTInfo< 4, EltVT32, VR128X>, 570 X86VectorVTInfo<16, EltVT32, VR512>, 571 vinsert128_insert, sched>, EVEX_V512; 572 573 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 574 X86VectorVTInfo< 4, EltVT64, VR256X>, 575 X86VectorVTInfo< 8, EltVT64, VR512>, 576 vinsert256_insert, sched>, VEX_W, EVEX_V512; 577 578 // Even with DQI we'd like to only use these instructions for masking. 579 let Predicates = [HasVLX, HasDQI] in 580 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 581 X86VectorVTInfo< 2, EltVT64, VR128X>, 582 X86VectorVTInfo< 4, EltVT64, VR256X>, 583 null_frag, vinsert128_insert, sched>, 584 VEX_W1X, EVEX_V256; 585 586 // Even with DQI we'd like to only use these instructions for masking. 587 let Predicates = [HasDQI] in { 588 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 589 X86VectorVTInfo< 2, EltVT64, VR128X>, 590 X86VectorVTInfo< 8, EltVT64, VR512>, 591 null_frag, vinsert128_insert, sched>, 592 VEX_W, EVEX_V512; 593 594 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 595 X86VectorVTInfo< 8, EltVT32, VR256X>, 596 X86VectorVTInfo<16, EltVT32, VR512>, 597 null_frag, vinsert256_insert, sched>, 598 EVEX_V512; 599 } 600} 601 602// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 603defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 604defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 605 606// Codegen pattern with the alternative types, 607// Even with AVX512DQ we'll still use these for unmasked operations. 608defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 609 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 610defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 612 613defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 614 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 615defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 616 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 617 618defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 619 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 620defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 621 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 622 623// Codegen pattern with the alternative types insert VEC128 into VEC256 624defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 625 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 626defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 627 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 628// Codegen pattern with the alternative types insert VEC128 into VEC512 629defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 630 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 631defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 632 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 633// Codegen pattern with the alternative types insert VEC256 into VEC512 634defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 635 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 636defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 637 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 638 639 640multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 641 X86VectorVTInfo To, X86VectorVTInfo Cast, 642 PatFrag vinsert_insert, 643 SDNodeXForm INSERT_get_vinsert_imm, 644 list<Predicate> p> { 645let Predicates = p in { 646 def : Pat<(Cast.VT 647 (vselect Cast.KRCWM:$mask, 648 (bitconvert 649 (vinsert_insert:$ins (To.VT To.RC:$src1), 650 (From.VT From.RC:$src2), 651 (iPTR imm))), 652 Cast.RC:$src0)), 653 (!cast<Instruction>(InstrStr#"rrk") 654 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 655 (INSERT_get_vinsert_imm To.RC:$ins))>; 656 def : Pat<(Cast.VT 657 (vselect Cast.KRCWM:$mask, 658 (bitconvert 659 (vinsert_insert:$ins (To.VT To.RC:$src1), 660 (From.VT 661 (bitconvert 662 (From.LdFrag addr:$src2))), 663 (iPTR imm))), 664 Cast.RC:$src0)), 665 (!cast<Instruction>(InstrStr#"rmk") 666 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 667 (INSERT_get_vinsert_imm To.RC:$ins))>; 668 669 def : Pat<(Cast.VT 670 (vselect Cast.KRCWM:$mask, 671 (bitconvert 672 (vinsert_insert:$ins (To.VT To.RC:$src1), 673 (From.VT From.RC:$src2), 674 (iPTR imm))), 675 Cast.ImmAllZerosV)), 676 (!cast<Instruction>(InstrStr#"rrkz") 677 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 678 (INSERT_get_vinsert_imm To.RC:$ins))>; 679 def : Pat<(Cast.VT 680 (vselect Cast.KRCWM:$mask, 681 (bitconvert 682 (vinsert_insert:$ins (To.VT To.RC:$src1), 683 (From.VT 684 (bitconvert 685 (From.LdFrag addr:$src2))), 686 (iPTR imm))), 687 Cast.ImmAllZerosV)), 688 (!cast<Instruction>(InstrStr#"rmkz") 689 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 690 (INSERT_get_vinsert_imm To.RC:$ins))>; 691} 692} 693 694defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 695 v8f32x_info, vinsert128_insert, 696 INSERT_get_vinsert128_imm, [HasVLX]>; 697defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 698 v4f64x_info, vinsert128_insert, 699 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 700 701defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 702 v8i32x_info, vinsert128_insert, 703 INSERT_get_vinsert128_imm, [HasVLX]>; 704defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 705 v8i32x_info, vinsert128_insert, 706 INSERT_get_vinsert128_imm, [HasVLX]>; 707defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 708 v8i32x_info, vinsert128_insert, 709 INSERT_get_vinsert128_imm, [HasVLX]>; 710defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 711 v4i64x_info, vinsert128_insert, 712 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 713defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 714 v4i64x_info, vinsert128_insert, 715 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 716defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 717 v4i64x_info, vinsert128_insert, 718 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 719 720defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 721 v16f32_info, vinsert128_insert, 722 INSERT_get_vinsert128_imm, [HasAVX512]>; 723defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 724 v8f64_info, vinsert128_insert, 725 INSERT_get_vinsert128_imm, [HasDQI]>; 726 727defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 728 v16i32_info, vinsert128_insert, 729 INSERT_get_vinsert128_imm, [HasAVX512]>; 730defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 731 v16i32_info, vinsert128_insert, 732 INSERT_get_vinsert128_imm, [HasAVX512]>; 733defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 734 v16i32_info, vinsert128_insert, 735 INSERT_get_vinsert128_imm, [HasAVX512]>; 736defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 737 v8i64_info, vinsert128_insert, 738 INSERT_get_vinsert128_imm, [HasDQI]>; 739defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 740 v8i64_info, vinsert128_insert, 741 INSERT_get_vinsert128_imm, [HasDQI]>; 742defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 743 v8i64_info, vinsert128_insert, 744 INSERT_get_vinsert128_imm, [HasDQI]>; 745 746defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 747 v16f32_info, vinsert256_insert, 748 INSERT_get_vinsert256_imm, [HasDQI]>; 749defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 750 v8f64_info, vinsert256_insert, 751 INSERT_get_vinsert256_imm, [HasAVX512]>; 752 753defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 754 v16i32_info, vinsert256_insert, 755 INSERT_get_vinsert256_imm, [HasDQI]>; 756defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 757 v16i32_info, vinsert256_insert, 758 INSERT_get_vinsert256_imm, [HasDQI]>; 759defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 760 v16i32_info, vinsert256_insert, 761 INSERT_get_vinsert256_imm, [HasDQI]>; 762defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 763 v8i64_info, vinsert256_insert, 764 INSERT_get_vinsert256_imm, [HasAVX512]>; 765defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 766 v8i64_info, vinsert256_insert, 767 INSERT_get_vinsert256_imm, [HasAVX512]>; 768defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 769 v8i64_info, vinsert256_insert, 770 INSERT_get_vinsert256_imm, [HasAVX512]>; 771 772// vinsertps - insert f32 to XMM 773let ExeDomain = SSEPackedSingle in { 774def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 775 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 776 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 777 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, 778 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 779def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 780 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 781 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 782 [(set VR128X:$dst, (X86insertps VR128X:$src1, 783 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 784 imm:$src3))]>, 785 EVEX_4V, EVEX_CD8<32, CD8VT1>, 786 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; 787} 788 789//===----------------------------------------------------------------------===// 790// AVX-512 VECTOR EXTRACT 791//--- 792 793// Supports two different pattern operators for mask and unmasked ops. Allows 794// null_frag to be passed for one. 795multiclass vextract_for_size_split<int Opcode, 796 X86VectorVTInfo From, X86VectorVTInfo To, 797 SDPatternOperator vextract_extract, 798 SDPatternOperator vextract_for_mask, 799 SchedWrite SchedRR, SchedWrite SchedMR> { 800 801 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 802 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 803 (ins From.RC:$src1, u8imm:$idx), 804 "vextract" # To.EltTypeName # "x" # To.NumElts, 805 "$idx, $src1", "$src1, $idx", 806 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 807 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 808 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 809 810 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 811 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 812 "vextract" # To.EltTypeName # "x" # To.NumElts # 813 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 814 [(store (To.VT (vextract_extract:$idx 815 (From.VT From.RC:$src1), (iPTR imm))), 816 addr:$dst)]>, EVEX, 817 Sched<[SchedMR]>; 818 819 let mayStore = 1, hasSideEffects = 0 in 820 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 821 (ins To.MemOp:$dst, To.KRCWM:$mask, 822 From.RC:$src1, u8imm:$idx), 823 "vextract" # To.EltTypeName # "x" # To.NumElts # 824 "\t{$idx, $src1, $dst {${mask}}|" 825 "$dst {${mask}}, $src1, $idx}", []>, 826 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 827 } 828} 829 830// Passes the same pattern operator for masked and unmasked ops. 831multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 832 X86VectorVTInfo To, 833 SDPatternOperator vextract_extract, 834 SchedWrite SchedRR, SchedWrite SchedMR> : 835 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 836 837// Codegen pattern for the alternative types 838multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 839 X86VectorVTInfo To, PatFrag vextract_extract, 840 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 841 let Predicates = p in { 842 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 843 (To.VT (!cast<Instruction>(InstrStr#"rr") 844 From.RC:$src1, 845 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 846 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 847 (iPTR imm))), addr:$dst), 848 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 849 (EXTRACT_get_vextract_imm To.RC:$ext))>; 850 } 851} 852 853multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 854 ValueType EltVT64, int Opcode256, 855 SchedWrite SchedRR, SchedWrite SchedMR> { 856 let Predicates = [HasAVX512] in { 857 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 858 X86VectorVTInfo<16, EltVT32, VR512>, 859 X86VectorVTInfo< 4, EltVT32, VR128X>, 860 vextract128_extract, SchedRR, SchedMR>, 861 EVEX_V512, EVEX_CD8<32, CD8VT4>; 862 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 863 X86VectorVTInfo< 8, EltVT64, VR512>, 864 X86VectorVTInfo< 4, EltVT64, VR256X>, 865 vextract256_extract, SchedRR, SchedMR>, 866 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 867 } 868 let Predicates = [HasVLX] in 869 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 870 X86VectorVTInfo< 8, EltVT32, VR256X>, 871 X86VectorVTInfo< 4, EltVT32, VR128X>, 872 vextract128_extract, SchedRR, SchedMR>, 873 EVEX_V256, EVEX_CD8<32, CD8VT4>; 874 875 // Even with DQI we'd like to only use these instructions for masking. 876 let Predicates = [HasVLX, HasDQI] in 877 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 878 X86VectorVTInfo< 4, EltVT64, VR256X>, 879 X86VectorVTInfo< 2, EltVT64, VR128X>, 880 null_frag, vextract128_extract, SchedRR, SchedMR>, 881 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 882 883 // Even with DQI we'd like to only use these instructions for masking. 884 let Predicates = [HasDQI] in { 885 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 886 X86VectorVTInfo< 8, EltVT64, VR512>, 887 X86VectorVTInfo< 2, EltVT64, VR128X>, 888 null_frag, vextract128_extract, SchedRR, SchedMR>, 889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 890 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 891 X86VectorVTInfo<16, EltVT32, VR512>, 892 X86VectorVTInfo< 8, EltVT32, VR256X>, 893 null_frag, vextract256_extract, SchedRR, SchedMR>, 894 EVEX_V512, EVEX_CD8<32, CD8VT8>; 895 } 896} 897 898// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 899defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 900defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 901 902// extract_subvector codegen patterns with the alternative types. 903// Even with AVX512DQ we'll still use these for unmasked operations. 904defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 905 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 906defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 908 909defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 910 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 911defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 913 914defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 915 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 916defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 917 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 918 919// Codegen pattern with the alternative types extract VEC128 from VEC256 920defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 921 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 922defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 923 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 924 925// Codegen pattern with the alternative types extract VEC128 from VEC512 926defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 927 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 928defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 929 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 930// Codegen pattern with the alternative types extract VEC256 from VEC512 931defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 932 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 933defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 934 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 935 936 937// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 938// smaller extract to enable EVEX->VEX. 939let Predicates = [NoVLX] in { 940def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 941 (v2i64 (VEXTRACTI128rr 942 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 943 (iPTR 1)))>; 944def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 945 (v2f64 (VEXTRACTF128rr 946 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 947 (iPTR 1)))>; 948def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 949 (v4i32 (VEXTRACTI128rr 950 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 951 (iPTR 1)))>; 952def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 953 (v4f32 (VEXTRACTF128rr 954 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 955 (iPTR 1)))>; 956def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 957 (v8i16 (VEXTRACTI128rr 958 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 959 (iPTR 1)))>; 960def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 961 (v16i8 (VEXTRACTI128rr 962 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 963 (iPTR 1)))>; 964} 965 966// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 967// smaller extract to enable EVEX->VEX. 968let Predicates = [HasVLX] in { 969def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 970 (v2i64 (VEXTRACTI32x4Z256rr 971 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 972 (iPTR 1)))>; 973def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 974 (v2f64 (VEXTRACTF32x4Z256rr 975 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 976 (iPTR 1)))>; 977def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 978 (v4i32 (VEXTRACTI32x4Z256rr 979 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 980 (iPTR 1)))>; 981def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 982 (v4f32 (VEXTRACTF32x4Z256rr 983 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 984 (iPTR 1)))>; 985def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 986 (v8i16 (VEXTRACTI32x4Z256rr 987 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 988 (iPTR 1)))>; 989def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 990 (v16i8 (VEXTRACTI32x4Z256rr 991 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 992 (iPTR 1)))>; 993} 994 995 996// Additional patterns for handling a bitcast between the vselect and the 997// extract_subvector. 998multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 999 X86VectorVTInfo To, X86VectorVTInfo Cast, 1000 PatFrag vextract_extract, 1001 SDNodeXForm EXTRACT_get_vextract_imm, 1002 list<Predicate> p> { 1003let Predicates = p in { 1004 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 1005 (bitconvert 1006 (To.VT (vextract_extract:$ext 1007 (From.VT From.RC:$src), (iPTR imm)))), 1008 To.RC:$src0)), 1009 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1010 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1011 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1012 1013 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 1014 (bitconvert 1015 (To.VT (vextract_extract:$ext 1016 (From.VT From.RC:$src), (iPTR imm)))), 1017 Cast.ImmAllZerosV)), 1018 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1019 Cast.KRCWM:$mask, From.RC:$src, 1020 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1021} 1022} 1023 1024defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1025 v4f32x_info, vextract128_extract, 1026 EXTRACT_get_vextract128_imm, [HasVLX]>; 1027defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1028 v2f64x_info, vextract128_extract, 1029 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1030 1031defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1032 v4i32x_info, vextract128_extract, 1033 EXTRACT_get_vextract128_imm, [HasVLX]>; 1034defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1035 v4i32x_info, vextract128_extract, 1036 EXTRACT_get_vextract128_imm, [HasVLX]>; 1037defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1038 v4i32x_info, vextract128_extract, 1039 EXTRACT_get_vextract128_imm, [HasVLX]>; 1040defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1041 v2i64x_info, vextract128_extract, 1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1043defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1044 v2i64x_info, vextract128_extract, 1045 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1046defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1047 v2i64x_info, vextract128_extract, 1048 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1049 1050defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1051 v4f32x_info, vextract128_extract, 1052 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1053defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1054 v2f64x_info, vextract128_extract, 1055 EXTRACT_get_vextract128_imm, [HasDQI]>; 1056 1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1058 v4i32x_info, vextract128_extract, 1059 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1061 v4i32x_info, vextract128_extract, 1062 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1063defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1064 v4i32x_info, vextract128_extract, 1065 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1067 v2i64x_info, vextract128_extract, 1068 EXTRACT_get_vextract128_imm, [HasDQI]>; 1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1070 v2i64x_info, vextract128_extract, 1071 EXTRACT_get_vextract128_imm, [HasDQI]>; 1072defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1073 v2i64x_info, vextract128_extract, 1074 EXTRACT_get_vextract128_imm, [HasDQI]>; 1075 1076defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1077 v8f32x_info, vextract256_extract, 1078 EXTRACT_get_vextract256_imm, [HasDQI]>; 1079defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1080 v4f64x_info, vextract256_extract, 1081 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1082 1083defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1084 v8i32x_info, vextract256_extract, 1085 EXTRACT_get_vextract256_imm, [HasDQI]>; 1086defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1087 v8i32x_info, vextract256_extract, 1088 EXTRACT_get_vextract256_imm, [HasDQI]>; 1089defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1090 v8i32x_info, vextract256_extract, 1091 EXTRACT_get_vextract256_imm, [HasDQI]>; 1092defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1093 v4i64x_info, vextract256_extract, 1094 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1095defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1096 v4i64x_info, vextract256_extract, 1097 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1098defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1099 v4i64x_info, vextract256_extract, 1100 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1101 1102// vextractps - extract 32 bits from XMM 1103def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), 1104 (ins VR128X:$src1, u8imm:$src2), 1105 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1106 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1107 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1108 1109def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1110 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1111 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1112 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1113 addr:$dst)]>, 1114 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1115 1116//===---------------------------------------------------------------------===// 1117// AVX-512 BROADCAST 1118//--- 1119// broadcast with a scalar argument. 1120multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1121 string Name, 1122 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1123 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1124 (!cast<Instruction>(Name#DestInfo.ZSuffix#r) 1125 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1126 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1127 (X86VBroadcast SrcInfo.FRC:$src), 1128 DestInfo.RC:$src0)), 1129 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk) 1130 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1131 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1132 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1133 (X86VBroadcast SrcInfo.FRC:$src), 1134 DestInfo.ImmAllZerosV)), 1135 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz) 1136 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1137} 1138 1139// Split version to allow mask and broadcast node to be different types. This 1140// helps support the 32x2 broadcasts. 1141multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1142 string Name, 1143 SchedWrite SchedRR, SchedWrite SchedRM, 1144 X86VectorVTInfo MaskInfo, 1145 X86VectorVTInfo DestInfo, 1146 X86VectorVTInfo SrcInfo, 1147 SDPatternOperator UnmaskedOp = X86VBroadcast> { 1148 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { 1149 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo, 1150 (outs MaskInfo.RC:$dst), 1151 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src", 1152 (MaskInfo.VT 1153 (bitconvert 1154 (DestInfo.VT 1155 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))), 1156 (MaskInfo.VT 1157 (bitconvert 1158 (DestInfo.VT 1159 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>, 1160 T8PD, EVEX, Sched<[SchedRR]>; 1161 let mayLoad = 1 in 1162 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo, 1163 (outs MaskInfo.RC:$dst), 1164 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 1165 (MaskInfo.VT 1166 (bitconvert 1167 (DestInfo.VT (UnmaskedOp 1168 (SrcInfo.ScalarLdFrag addr:$src))))), 1169 (MaskInfo.VT 1170 (bitconvert 1171 (DestInfo.VT (X86VBroadcast 1172 (SrcInfo.ScalarLdFrag addr:$src)))))>, 1173 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>, 1174 Sched<[SchedRM]>; 1175 } 1176 1177 def : Pat<(MaskInfo.VT 1178 (bitconvert 1179 (DestInfo.VT (UnmaskedOp 1180 (SrcInfo.VT (scalar_to_vector 1181 (SrcInfo.ScalarLdFrag addr:$src))))))), 1182 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>; 1183 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, 1184 (bitconvert 1185 (DestInfo.VT 1186 (X86VBroadcast 1187 (SrcInfo.VT (scalar_to_vector 1188 (SrcInfo.ScalarLdFrag addr:$src)))))), 1189 MaskInfo.RC:$src0)), 1190 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk) 1191 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>; 1192 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, 1193 (bitconvert 1194 (DestInfo.VT 1195 (X86VBroadcast 1196 (SrcInfo.VT (scalar_to_vector 1197 (SrcInfo.ScalarLdFrag addr:$src)))))), 1198 MaskInfo.ImmAllZerosV)), 1199 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz) 1200 MaskInfo.KRCWM:$mask, addr:$src)>; 1201} 1202 1203// Helper class to force mask and broadcast result to same type. 1204multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1205 SchedWrite SchedRR, SchedWrite SchedRM, 1206 X86VectorVTInfo DestInfo, 1207 X86VectorVTInfo SrcInfo> : 1208 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1209 DestInfo, DestInfo, SrcInfo>; 1210 1211multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1212 AVX512VLVectorVTInfo _> { 1213 let Predicates = [HasAVX512] in { 1214 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1215 WriteFShuffle256Ld, _.info512, _.info128>, 1216 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1217 _.info128>, 1218 EVEX_V512; 1219 } 1220 1221 let Predicates = [HasVLX] in { 1222 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1223 WriteFShuffle256Ld, _.info256, _.info128>, 1224 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1225 _.info128>, 1226 EVEX_V256; 1227 } 1228} 1229 1230multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1231 AVX512VLVectorVTInfo _> { 1232 let Predicates = [HasAVX512] in { 1233 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1234 WriteFShuffle256Ld, _.info512, _.info128>, 1235 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1236 _.info128>, 1237 EVEX_V512; 1238 } 1239 1240 let Predicates = [HasVLX] in { 1241 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1242 WriteFShuffle256Ld, _.info256, _.info128>, 1243 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1244 _.info128>, 1245 EVEX_V256; 1246 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1247 WriteFShuffle256Ld, _.info128, _.info128>, 1248 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1249 _.info128>, 1250 EVEX_V128; 1251 } 1252} 1253defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1254 avx512vl_f32_info>; 1255defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1256 avx512vl_f64_info>, VEX_W1X; 1257 1258multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1259 X86VectorVTInfo _, SDPatternOperator OpNode, 1260 RegisterClass SrcRC> { 1261 let ExeDomain = _.ExeDomain in 1262 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1263 (ins SrcRC:$src), 1264 "vpbroadcast"##_.Suffix, "$src", "$src", 1265 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX, 1266 Sched<[SchedRR]>; 1267} 1268 1269multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1270 X86VectorVTInfo _, SDPatternOperator OpNode, 1271 RegisterClass SrcRC, SubRegIndex Subreg> { 1272 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1273 defm r : AVX512_maskable_custom<opc, MRMSrcReg, 1274 (outs _.RC:$dst), (ins GR32:$src), 1275 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1276 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1277 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], 1278 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1279 1280 def : Pat <(_.VT (OpNode SrcRC:$src)), 1281 (!cast<Instruction>(Name#r) 1282 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1283 1284 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1285 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, 1286 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1287 1288 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1289 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, 1290 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1291} 1292 1293multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1294 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1295 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1296 let Predicates = [prd] in 1297 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1298 OpNode, SrcRC, Subreg>, EVEX_V512; 1299 let Predicates = [prd, HasVLX] in { 1300 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1301 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1302 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1303 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1304 } 1305} 1306 1307multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1308 SDPatternOperator OpNode, 1309 RegisterClass SrcRC, Predicate prd> { 1310 let Predicates = [prd] in 1311 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1312 SrcRC>, EVEX_V512; 1313 let Predicates = [prd, HasVLX] in { 1314 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1315 SrcRC>, EVEX_V256; 1316 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1317 SrcRC>, EVEX_V128; 1318 } 1319} 1320 1321defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1322 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1323defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1324 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1325 HasBWI>; 1326defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1327 X86VBroadcast, GR32, HasAVX512>; 1328defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1329 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1330 1331// Provide aliases for broadcast from the same register class that 1332// automatically does the extract. 1333multiclass avx512_int_broadcast_rm_lowering<string Name, 1334 X86VectorVTInfo DestInfo, 1335 X86VectorVTInfo SrcInfo, 1336 X86VectorVTInfo ExtInfo> { 1337 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), 1338 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r") 1339 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>; 1340} 1341 1342multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1343 AVX512VLVectorVTInfo _, Predicate prd> { 1344 let Predicates = [prd] in { 1345 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1346 WriteShuffle256Ld, _.info512, _.info128>, 1347 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>, 1348 EVEX_V512; 1349 // Defined separately to avoid redefinition. 1350 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>; 1351 } 1352 let Predicates = [prd, HasVLX] in { 1353 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1354 WriteShuffle256Ld, _.info256, _.info128>, 1355 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>, 1356 EVEX_V256; 1357 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1358 WriteShuffleXLd, _.info128, _.info128>, 1359 EVEX_V128; 1360 } 1361} 1362 1363defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1364 avx512vl_i8_info, HasBWI>; 1365defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1366 avx512vl_i16_info, HasBWI>; 1367defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1368 avx512vl_i32_info, HasAVX512>; 1369defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1370 avx512vl_i64_info, HasAVX512>, VEX_W1X; 1371 1372multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1373 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1374 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1375 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1376 (_Dst.VT (X86SubVBroadcast 1377 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, 1378 Sched<[SchedWriteShuffle.YMM.Folded]>, 1379 AVX5128IBase, EVEX; 1380} 1381 1382// This should be used for the AVX512DQ broadcast instructions. It disables 1383// the unmasked patterns so that we only use the DQ instructions when masking 1384// is requested. 1385multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1386 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1387 let hasSideEffects = 0, mayLoad = 1 in 1388 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1389 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1390 (null_frag), 1391 (_Dst.VT (X86SubVBroadcast 1392 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, 1393 Sched<[SchedWriteShuffle.YMM.Folded]>, 1394 AVX5128IBase, EVEX; 1395} 1396 1397let Predicates = [HasAVX512] in { 1398 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1399 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))), 1400 (VPBROADCASTQZm addr:$src)>; 1401} 1402 1403let Predicates = [HasVLX] in { 1404 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1405 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), 1406 (VPBROADCASTQZ128m addr:$src)>; 1407 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), 1408 (VPBROADCASTQZ256m addr:$src)>; 1409} 1410let Predicates = [HasVLX, HasBWI] in { 1411 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1412 // This means we'll encounter truncated i32 loads; match that here. 1413 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1414 (VPBROADCASTWZ128m addr:$src)>; 1415 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1416 (VPBROADCASTWZ256m addr:$src)>; 1417 def : Pat<(v8i16 (X86VBroadcast 1418 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1419 (VPBROADCASTWZ128m addr:$src)>; 1420 def : Pat<(v16i16 (X86VBroadcast 1421 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1422 (VPBROADCASTWZ256m addr:$src)>; 1423} 1424 1425//===----------------------------------------------------------------------===// 1426// AVX-512 BROADCAST SUBVECTORS 1427// 1428 1429defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1430 v16i32_info, v4i32x_info>, 1431 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1432defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1433 v16f32_info, v4f32x_info>, 1434 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1435defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1436 v8i64_info, v4i64x_info>, VEX_W, 1437 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1438defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1439 v8f64_info, v4f64x_info>, VEX_W, 1440 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1441 1442let Predicates = [HasAVX512] in { 1443def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))), 1444 (VBROADCASTF64X4rm addr:$src)>; 1445def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))), 1446 (VBROADCASTI64X4rm addr:$src)>; 1447def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))), 1448 (VBROADCASTI64X4rm addr:$src)>; 1449def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))), 1450 (VBROADCASTI64X4rm addr:$src)>; 1451 1452// Provide fallback in case the load node that is used in the patterns above 1453// is used by additional users, which prevents the pattern selection. 1454def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), 1455 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1456 (v4f64 VR256X:$src), 1)>; 1457def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), 1458 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1459 (v8f32 VR256X:$src), 1)>; 1460def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), 1461 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1462 (v4i64 VR256X:$src), 1)>; 1463def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), 1464 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1465 (v8i32 VR256X:$src), 1)>; 1466def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))), 1467 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1468 (v16i16 VR256X:$src), 1)>; 1469def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))), 1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1471 (v32i8 VR256X:$src), 1)>; 1472 1473def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1474 (VBROADCASTF32X4rm addr:$src)>; 1475def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1476 (VBROADCASTI32X4rm addr:$src)>; 1477def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), 1478 (VBROADCASTI32X4rm addr:$src)>; 1479def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), 1480 (VBROADCASTI32X4rm addr:$src)>; 1481 1482// Patterns for selects of bitcasted operations. 1483def : Pat<(vselect VK16WM:$mask, 1484 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1485 (bc_v16f32 (v16i32 immAllZerosV))), 1486 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1487def : Pat<(vselect VK16WM:$mask, 1488 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1489 VR512:$src0), 1490 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1491def : Pat<(vselect VK16WM:$mask, 1492 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1493 (v16i32 immAllZerosV)), 1494 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1495def : Pat<(vselect VK16WM:$mask, 1496 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1497 VR512:$src0), 1498 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1499 1500def : Pat<(vselect VK8WM:$mask, 1501 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1502 (bc_v8f64 (v16i32 immAllZerosV))), 1503 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1504def : Pat<(vselect VK8WM:$mask, 1505 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1506 VR512:$src0), 1507 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1508def : Pat<(vselect VK8WM:$mask, 1509 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))), 1510 (bc_v8i64 (v16i32 immAllZerosV))), 1511 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1512def : Pat<(vselect VK8WM:$mask, 1513 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))), 1514 VR512:$src0), 1515 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1516} 1517 1518let Predicates = [HasVLX] in { 1519defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1520 v8i32x_info, v4i32x_info>, 1521 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1522defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1523 v8f32x_info, v4f32x_info>, 1524 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1525 1526def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1527 (VBROADCASTF32X4Z256rm addr:$src)>; 1528def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1529 (VBROADCASTI32X4Z256rm addr:$src)>; 1530def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), 1531 (VBROADCASTI32X4Z256rm addr:$src)>; 1532def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), 1533 (VBROADCASTI32X4Z256rm addr:$src)>; 1534 1535// Patterns for selects of bitcasted operations. 1536def : Pat<(vselect VK8WM:$mask, 1537 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1538 (bc_v8f32 (v8i32 immAllZerosV))), 1539 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1540def : Pat<(vselect VK8WM:$mask, 1541 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1542 VR256X:$src0), 1543 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1544def : Pat<(vselect VK8WM:$mask, 1545 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1546 (v8i32 immAllZerosV)), 1547 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1548def : Pat<(vselect VK8WM:$mask, 1549 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1550 VR256X:$src0), 1551 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1552 1553 1554// Provide fallback in case the load node that is used in the patterns above 1555// is used by additional users, which prevents the pattern selection. 1556def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 1557 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1558 (v2f64 VR128X:$src), 1)>; 1559def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 1560 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1561 (v4f32 VR128X:$src), 1)>; 1562def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 1563 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1564 (v2i64 VR128X:$src), 1)>; 1565def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 1566 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1567 (v4i32 VR128X:$src), 1)>; 1568def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 1569 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1570 (v8i16 VR128X:$src), 1)>; 1571def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 1572 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1573 (v16i8 VR128X:$src), 1)>; 1574} 1575 1576let Predicates = [HasVLX, HasDQI] in { 1577defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1578 v4i64x_info, v2i64x_info>, VEX_W1X, 1579 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1580defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1581 v4f64x_info, v2f64x_info>, VEX_W1X, 1582 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1583 1584// Patterns for selects of bitcasted operations. 1585def : Pat<(vselect VK4WM:$mask, 1586 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1587 (bc_v4f64 (v8i32 immAllZerosV))), 1588 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1589def : Pat<(vselect VK4WM:$mask, 1590 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1591 VR256X:$src0), 1592 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1593def : Pat<(vselect VK4WM:$mask, 1594 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1595 (bc_v4i64 (v8i32 immAllZerosV))), 1596 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1597def : Pat<(vselect VK4WM:$mask, 1598 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1599 VR256X:$src0), 1600 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1601} 1602 1603let Predicates = [HasDQI] in { 1604defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1605 v8i64_info, v2i64x_info>, VEX_W, 1606 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1607defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1608 v16i32_info, v8i32x_info>, 1609 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1610defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1611 v8f64_info, v2f64x_info>, VEX_W, 1612 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1613defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1614 v16f32_info, v8f32x_info>, 1615 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1616 1617// Patterns for selects of bitcasted operations. 1618def : Pat<(vselect VK16WM:$mask, 1619 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1620 (bc_v16f32 (v16i32 immAllZerosV))), 1621 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1622def : Pat<(vselect VK16WM:$mask, 1623 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1624 VR512:$src0), 1625 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1626def : Pat<(vselect VK16WM:$mask, 1627 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1628 (v16i32 immAllZerosV)), 1629 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1630def : Pat<(vselect VK16WM:$mask, 1631 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1632 VR512:$src0), 1633 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1634 1635def : Pat<(vselect VK8WM:$mask, 1636 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1637 (bc_v8f64 (v16i32 immAllZerosV))), 1638 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1639def : Pat<(vselect VK8WM:$mask, 1640 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1641 VR512:$src0), 1642 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1643def : Pat<(vselect VK8WM:$mask, 1644 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1645 (bc_v8i64 (v16i32 immAllZerosV))), 1646 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1647def : Pat<(vselect VK8WM:$mask, 1648 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1649 VR512:$src0), 1650 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1651} 1652 1653multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1654 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1655 let Predicates = [HasDQI] in 1656 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1657 WriteShuffle256Ld, _Dst.info512, 1658 _Src.info512, _Src.info128, null_frag>, 1659 EVEX_V512; 1660 let Predicates = [HasDQI, HasVLX] in 1661 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1662 WriteShuffle256Ld, _Dst.info256, 1663 _Src.info256, _Src.info128, null_frag>, 1664 EVEX_V256; 1665} 1666 1667multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1668 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1669 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1670 1671 let Predicates = [HasDQI, HasVLX] in 1672 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1673 WriteShuffleXLd, _Dst.info128, 1674 _Src.info128, _Src.info128, null_frag>, 1675 EVEX_V128; 1676} 1677 1678defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1679 avx512vl_i32_info, avx512vl_i64_info>; 1680defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1681 avx512vl_f32_info, avx512vl_f64_info>; 1682 1683let Predicates = [HasVLX] in { 1684def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))), 1685 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; 1686def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))), 1687 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; 1688} 1689 1690def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), 1691 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; 1692def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), 1693 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; 1694 1695def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), 1696 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; 1697def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), 1698 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; 1699 1700//===----------------------------------------------------------------------===// 1701// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1702//--- 1703multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1704 X86VectorVTInfo _, RegisterClass KRC> { 1705 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1706 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1707 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1708 EVEX, Sched<[WriteShuffle]>; 1709} 1710 1711multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1712 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1713 let Predicates = [HasCDI] in 1714 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1715 let Predicates = [HasCDI, HasVLX] in { 1716 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1717 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1718 } 1719} 1720 1721defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1722 avx512vl_i32_info, VK16>; 1723defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1724 avx512vl_i64_info, VK8>, VEX_W; 1725 1726//===----------------------------------------------------------------------===// 1727// -- VPERMI2 - 3 source operands form -- 1728multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1729 X86FoldableSchedWrite sched, 1730 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1731let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1732 hasSideEffects = 0 in { 1733 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1734 (ins _.RC:$src2, _.RC:$src3), 1735 OpcodeStr, "$src3, $src2", "$src2, $src3", 1736 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1737 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1738 1739 let mayLoad = 1 in 1740 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1741 (ins _.RC:$src2, _.MemOp:$src3), 1742 OpcodeStr, "$src3, $src2", "$src2, $src3", 1743 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1744 (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>, 1745 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; 1746 } 1747} 1748 1749multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1750 X86FoldableSchedWrite sched, 1751 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1752 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1753 hasSideEffects = 0, mayLoad = 1 in 1754 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1755 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1756 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1757 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1758 (_.VT (X86VPermt2 _.RC:$src2, 1759 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, 1760 AVX5128IBase, EVEX_4V, EVEX_B, 1761 Sched<[sched.Folded, ReadAfterLd]>; 1762} 1763 1764multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1765 X86FoldableSchedWrite sched, 1766 AVX512VLVectorVTInfo VTInfo, 1767 AVX512VLVectorVTInfo ShuffleMask> { 1768 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1769 ShuffleMask.info512>, 1770 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1771 ShuffleMask.info512>, EVEX_V512; 1772 let Predicates = [HasVLX] in { 1773 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1774 ShuffleMask.info128>, 1775 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1776 ShuffleMask.info128>, EVEX_V128; 1777 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1778 ShuffleMask.info256>, 1779 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1780 ShuffleMask.info256>, EVEX_V256; 1781 } 1782} 1783 1784multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1785 X86FoldableSchedWrite sched, 1786 AVX512VLVectorVTInfo VTInfo, 1787 AVX512VLVectorVTInfo Idx, 1788 Predicate Prd> { 1789 let Predicates = [Prd] in 1790 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1791 Idx.info512>, EVEX_V512; 1792 let Predicates = [Prd, HasVLX] in { 1793 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1794 Idx.info128>, EVEX_V128; 1795 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1796 Idx.info256>, EVEX_V256; 1797 } 1798} 1799 1800defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1801 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1802defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1803 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1804defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1805 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1806 VEX_W, EVEX_CD8<16, CD8VF>; 1807defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1808 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1809 EVEX_CD8<8, CD8VF>; 1810defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1811 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1812defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1813 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1814 1815// Extra patterns to deal with extra bitcasts due to passthru and index being 1816// different types on the fp versions. 1817multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1818 X86VectorVTInfo IdxVT, 1819 X86VectorVTInfo CastVT> { 1820 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1821 (X86VPermt2 (_.VT _.RC:$src2), 1822 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), 1823 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1824 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1825 _.RC:$src2, _.RC:$src3)>; 1826 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1827 (X86VPermt2 _.RC:$src2, 1828 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1829 (_.LdFrag addr:$src3)), 1830 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1831 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1832 _.RC:$src2, addr:$src3)>; 1833 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1834 (X86VPermt2 _.RC:$src2, 1835 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1836 (X86VBroadcast (_.ScalarLdFrag addr:$src3))), 1837 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1838 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1839 _.RC:$src2, addr:$src3)>; 1840} 1841 1842// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1843defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1844defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1845defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1846 1847// VPERMT2 1848multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1849 X86FoldableSchedWrite sched, 1850 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1851let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1852 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1853 (ins IdxVT.RC:$src2, _.RC:$src3), 1854 OpcodeStr, "$src3, $src2", "$src2, $src3", 1855 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1856 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1857 1858 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1859 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1860 OpcodeStr, "$src3, $src2", "$src2, $src3", 1861 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1862 (bitconvert (_.LdFrag addr:$src3)))), 1>, 1863 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; 1864 } 1865} 1866multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1867 X86FoldableSchedWrite sched, 1868 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1869 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1870 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1871 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1872 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1873 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1874 (_.VT (X86VPermt2 _.RC:$src1, 1875 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, 1876 AVX5128IBase, EVEX_4V, EVEX_B, 1877 Sched<[sched.Folded, ReadAfterLd]>; 1878} 1879 1880multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1881 X86FoldableSchedWrite sched, 1882 AVX512VLVectorVTInfo VTInfo, 1883 AVX512VLVectorVTInfo ShuffleMask> { 1884 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1885 ShuffleMask.info512>, 1886 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1887 ShuffleMask.info512>, EVEX_V512; 1888 let Predicates = [HasVLX] in { 1889 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1890 ShuffleMask.info128>, 1891 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1892 ShuffleMask.info128>, EVEX_V128; 1893 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1894 ShuffleMask.info256>, 1895 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1896 ShuffleMask.info256>, EVEX_V256; 1897 } 1898} 1899 1900multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1901 X86FoldableSchedWrite sched, 1902 AVX512VLVectorVTInfo VTInfo, 1903 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1904 let Predicates = [Prd] in 1905 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1906 Idx.info512>, EVEX_V512; 1907 let Predicates = [Prd, HasVLX] in { 1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1909 Idx.info128>, EVEX_V128; 1910 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1911 Idx.info256>, EVEX_V256; 1912 } 1913} 1914 1915defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1916 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1917defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1918 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1919defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1920 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1921 VEX_W, EVEX_CD8<16, CD8VF>; 1922defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1923 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1924 EVEX_CD8<8, CD8VF>; 1925defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1926 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1927defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1928 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1929 1930//===----------------------------------------------------------------------===// 1931// AVX-512 - BLEND using mask 1932// 1933 1934multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1935 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1936 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1937 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1938 (ins _.RC:$src1, _.RC:$src2), 1939 !strconcat(OpcodeStr, 1940 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1941 EVEX_4V, Sched<[sched]>; 1942 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1943 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1944 !strconcat(OpcodeStr, 1945 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1946 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1947 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1948 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1949 !strconcat(OpcodeStr, 1950 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1951 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1952 let mayLoad = 1 in { 1953 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1954 (ins _.RC:$src1, _.MemOp:$src2), 1955 !strconcat(OpcodeStr, 1956 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1957 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1958 Sched<[sched.Folded, ReadAfterLd]>; 1959 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1960 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1961 !strconcat(OpcodeStr, 1962 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1963 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1964 Sched<[sched.Folded, ReadAfterLd]>; 1965 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1966 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1967 !strconcat(OpcodeStr, 1968 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1969 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1970 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 1971 } 1972 } 1973} 1974multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1975 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1976 let mayLoad = 1, hasSideEffects = 0 in { 1977 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1978 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1979 !strconcat(OpcodeStr, 1980 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1981 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1982 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1983 Sched<[sched.Folded, ReadAfterLd]>; 1984 1985 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1986 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1987 !strconcat(OpcodeStr, 1988 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1989 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1990 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1991 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 1992 1993 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1994 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1995 !strconcat(OpcodeStr, 1996 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1997 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1998 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1999 Sched<[sched.Folded, ReadAfterLd]>; 2000 } 2001} 2002 2003multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2004 AVX512VLVectorVTInfo VTInfo> { 2005 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2006 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2007 EVEX_V512; 2008 2009 let Predicates = [HasVLX] in { 2010 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2011 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2012 EVEX_V256; 2013 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2014 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2015 EVEX_V128; 2016 } 2017} 2018 2019multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2020 AVX512VLVectorVTInfo VTInfo> { 2021 let Predicates = [HasBWI] in 2022 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2023 EVEX_V512; 2024 2025 let Predicates = [HasBWI, HasVLX] in { 2026 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2027 EVEX_V256; 2028 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2029 EVEX_V128; 2030 } 2031} 2032 2033defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2034 avx512vl_f32_info>; 2035defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2036 avx512vl_f64_info>, VEX_W; 2037defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2038 avx512vl_i32_info>; 2039defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2040 avx512vl_i64_info>, VEX_W; 2041defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2042 avx512vl_i8_info>; 2043defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2044 avx512vl_i16_info>, VEX_W; 2045 2046//===----------------------------------------------------------------------===// 2047// Compare Instructions 2048//===----------------------------------------------------------------------===// 2049 2050// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2051 2052multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, 2053 X86FoldableSchedWrite sched> { 2054 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2055 (outs _.KRC:$dst), 2056 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), 2057 "vcmp${cc}"#_.Suffix, 2058 "$src2, $src1", "$src1, $src2", 2059 (OpNode (_.VT _.RC:$src1), 2060 (_.VT _.RC:$src2), 2061 imm:$cc)>, EVEX_4V, Sched<[sched]>; 2062 let mayLoad = 1 in 2063 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2064 (outs _.KRC:$dst), 2065 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc), 2066 "vcmp${cc}"#_.Suffix, 2067 "$src2, $src1", "$src1, $src2", 2068 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2069 imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, 2070 Sched<[sched.Folded, ReadAfterLd]>; 2071 2072 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2073 (outs _.KRC:$dst), 2074 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), 2075 "vcmp${cc}"#_.Suffix, 2076 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 2077 (OpNodeRnd (_.VT _.RC:$src1), 2078 (_.VT _.RC:$src2), 2079 imm:$cc, 2080 (i32 FROUND_NO_EXC))>, 2081 EVEX_4V, EVEX_B, Sched<[sched]>; 2082 // Accept explicit immediate argument form instead of comparison code. 2083 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2084 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2085 (outs VK1:$dst), 2086 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2087 "vcmp"#_.Suffix, 2088 "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V, 2089 Sched<[sched]>, NotMemoryFoldable; 2090 let mayLoad = 1 in 2091 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, 2092 (outs _.KRC:$dst), 2093 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2094 "vcmp"#_.Suffix, 2095 "$cc, $src2, $src1", "$src1, $src2, $cc">, 2096 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, 2097 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 2098 2099 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2100 (outs _.KRC:$dst), 2101 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2102 "vcmp"#_.Suffix, 2103 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">, 2104 EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable; 2105 }// let isAsmParserOnly = 1, hasSideEffects = 0 2106 2107 let isCodeGenOnly = 1 in { 2108 let isCommutable = 1 in 2109 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2110 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc), 2111 !strconcat("vcmp${cc}", _.Suffix, 2112 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2113 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2114 _.FRC:$src2, 2115 imm:$cc))]>, 2116 EVEX_4V, Sched<[sched]>; 2117 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2118 (outs _.KRC:$dst), 2119 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), 2120 !strconcat("vcmp${cc}", _.Suffix, 2121 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2122 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2123 (_.ScalarLdFrag addr:$src2), 2124 imm:$cc))]>, 2125 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, 2126 Sched<[sched.Folded, ReadAfterLd]>; 2127 } 2128} 2129 2130let Predicates = [HasAVX512] in { 2131 let ExeDomain = SSEPackedSingle in 2132 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd, 2133 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2134 let ExeDomain = SSEPackedDouble in 2135 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd, 2136 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2137} 2138 2139multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2140 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2141 bit IsCommutable> { 2142 let isCommutable = IsCommutable in 2143 def rr : AVX512BI<opc, MRMSrcReg, 2144 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2145 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2146 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>, 2147 EVEX_4V, Sched<[sched]>; 2148 def rm : AVX512BI<opc, MRMSrcMem, 2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2150 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2151 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), 2152 (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 2153 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 2154 let isCommutable = IsCommutable in 2155 def rrk : AVX512BI<opc, MRMSrcReg, 2156 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2157 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2158 "$dst {${mask}}, $src1, $src2}"), 2159 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2160 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>, 2161 EVEX_4V, EVEX_K, Sched<[sched]>; 2162 def rmk : AVX512BI<opc, MRMSrcMem, 2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2164 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2165 "$dst {${mask}}, $src1, $src2}"), 2166 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2167 (OpNode (_.VT _.RC:$src1), 2168 (_.VT (bitconvert 2169 (_.LdFrag addr:$src2))))))]>, 2170 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2171} 2172 2173multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2174 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2175 bit IsCommutable> : 2176 avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> { 2177 def rmb : AVX512BI<opc, MRMSrcMem, 2178 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2179 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2180 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2181 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), 2182 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>, 2183 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2184 def rmbk : AVX512BI<opc, MRMSrcMem, 2185 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2186 _.ScalarMemOp:$src2), 2187 !strconcat(OpcodeStr, 2188 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2189 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2190 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2191 (OpNode (_.VT _.RC:$src1), 2192 (X86VBroadcast 2193 (_.ScalarLdFrag addr:$src2)))))]>, 2194 EVEX_4V, EVEX_K, EVEX_B, 2195 Sched<[sched.Folded, ReadAfterLd]>; 2196} 2197 2198multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2199 X86SchedWriteWidths sched, 2200 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2201 bit IsCommutable = 0> { 2202 let Predicates = [prd] in 2203 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM, 2204 VTInfo.info512, IsCommutable>, EVEX_V512; 2205 2206 let Predicates = [prd, HasVLX] in { 2207 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM, 2208 VTInfo.info256, IsCommutable>, EVEX_V256; 2209 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM, 2210 VTInfo.info128, IsCommutable>, EVEX_V128; 2211 } 2212} 2213 2214multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2215 PatFrag OpNode, X86SchedWriteWidths sched, 2216 AVX512VLVectorVTInfo VTInfo, 2217 Predicate prd, bit IsCommutable = 0> { 2218 let Predicates = [prd] in 2219 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM, 2220 VTInfo.info512, IsCommutable>, EVEX_V512; 2221 2222 let Predicates = [prd, HasVLX] in { 2223 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM, 2224 VTInfo.info256, IsCommutable>, EVEX_V256; 2225 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM, 2226 VTInfo.info128, IsCommutable>, EVEX_V128; 2227 } 2228} 2229 2230// This fragment treats X86cmpm as commutable to help match loads in both 2231// operands for PCMPEQ. 2232def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2233def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), 2234 (X86setcc_commute node:$src1, node:$src2, SETEQ)>; 2235def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2236 (setcc node:$src1, node:$src2, SETGT)>; 2237 2238// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2239// increase the pattern complexity the way an immediate would. 2240let AddedComplexity = 2 in { 2241// FIXME: Is there a better scheduler class for VPCMP? 2242defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, 2243 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2244 EVEX_CD8<8, CD8VF>, VEX_WIG; 2245 2246defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, 2247 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2248 EVEX_CD8<16, CD8VF>, VEX_WIG; 2249 2250defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, 2251 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2252 EVEX_CD8<32, CD8VF>; 2253 2254defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, 2255 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2256 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2257 2258defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, 2259 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2260 EVEX_CD8<8, CD8VF>, VEX_WIG; 2261 2262defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, 2263 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2264 EVEX_CD8<16, CD8VF>, VEX_WIG; 2265 2266defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, 2267 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2268 EVEX_CD8<32, CD8VF>; 2269 2270defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, 2271 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2272 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2273} 2274 2275multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2276 PatFrag CommFrag, X86FoldableSchedWrite sched, 2277 X86VectorVTInfo _, string Name> { 2278 let isCommutable = 1 in 2279 def rri : AVX512AIi8<opc, MRMSrcReg, 2280 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc), 2281 !strconcat("vpcmp${cc}", Suffix, 2282 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2283 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2284 (_.VT _.RC:$src2), 2285 cond)))]>, 2286 EVEX_4V, Sched<[sched]>; 2287 def rmi : AVX512AIi8<opc, MRMSrcMem, 2288 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc), 2289 !strconcat("vpcmp${cc}", Suffix, 2290 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2291 [(set _.KRC:$dst, (_.KVT 2292 (Frag:$cc 2293 (_.VT _.RC:$src1), 2294 (_.VT (bitconvert (_.LdFrag addr:$src2))), 2295 cond)))]>, 2296 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 2297 let isCommutable = 1 in 2298 def rrik : AVX512AIi8<opc, MRMSrcReg, 2299 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2300 AVX512ICC:$cc), 2301 !strconcat("vpcmp${cc}", Suffix, 2302 "\t{$src2, $src1, $dst {${mask}}|", 2303 "$dst {${mask}}, $src1, $src2}"), 2304 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2305 (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2306 (_.VT _.RC:$src2), 2307 cond))))]>, 2308 EVEX_4V, EVEX_K, Sched<[sched]>; 2309 def rmik : AVX512AIi8<opc, MRMSrcMem, 2310 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2311 AVX512ICC:$cc), 2312 !strconcat("vpcmp${cc}", Suffix, 2313 "\t{$src2, $src1, $dst {${mask}}|", 2314 "$dst {${mask}}, $src1, $src2}"), 2315 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2316 (_.KVT 2317 (Frag:$cc 2318 (_.VT _.RC:$src1), 2319 (_.VT (bitconvert 2320 (_.LdFrag addr:$src2))), 2321 cond))))]>, 2322 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2323 2324 // Accept explicit immediate argument form instead of comparison code. 2325 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2326 def rri_alt : AVX512AIi8<opc, MRMSrcReg, 2327 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2328 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", 2329 "$dst, $src1, $src2, $cc}"), []>, 2330 EVEX_4V, Sched<[sched]>, NotMemoryFoldable; 2331 let mayLoad = 1 in 2332 def rmi_alt : AVX512AIi8<opc, MRMSrcMem, 2333 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2334 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", 2335 "$dst, $src1, $src2, $cc}"), []>, 2336 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 2337 def rrik_alt : AVX512AIi8<opc, MRMSrcReg, 2338 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2339 u8imm:$cc), 2340 !strconcat("vpcmp", Suffix, 2341 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2342 "$dst {${mask}}, $src1, $src2, $cc}"), []>, 2343 EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable; 2344 let mayLoad = 1 in 2345 def rmik_alt : AVX512AIi8<opc, MRMSrcMem, 2346 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2347 u8imm:$cc), 2348 !strconcat("vpcmp", Suffix, 2349 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2350 "$dst {${mask}}, $src1, $src2, $cc}"), []>, 2351 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>, 2352 NotMemoryFoldable; 2353 } 2354 2355 def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)), 2356 (_.VT _.RC:$src1), cond)), 2357 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2358 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2359 2360 def : Pat<(and _.KRCWM:$mask, 2361 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)), 2362 (_.VT _.RC:$src1), cond))), 2363 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2364 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2365 (CommFrag.OperandTransform $cc))>; 2366} 2367 2368multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2369 PatFrag CommFrag, X86FoldableSchedWrite sched, 2370 X86VectorVTInfo _, string Name> : 2371 avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> { 2372 def rmib : AVX512AIi8<opc, MRMSrcMem, 2373 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2374 AVX512ICC:$cc), 2375 !strconcat("vpcmp${cc}", Suffix, 2376 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2377 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2378 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2379 (_.VT _.RC:$src1), 2380 (X86VBroadcast 2381 (_.ScalarLdFrag addr:$src2)), 2382 cond)))]>, 2383 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2384 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2385 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2386 _.ScalarMemOp:$src2, AVX512ICC:$cc), 2387 !strconcat("vpcmp${cc}", Suffix, 2388 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2389 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2390 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2391 (_.KVT (Frag:$cc 2392 (_.VT _.RC:$src1), 2393 (X86VBroadcast 2394 (_.ScalarLdFrag addr:$src2)), 2395 cond))))]>, 2396 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2397 2398 // Accept explicit immediate argument form instead of comparison code. 2399 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in { 2400 def rmib_alt : AVX512AIi8<opc, MRMSrcMem, 2401 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2402 u8imm:$cc), 2403 !strconcat("vpcmp", Suffix, 2404 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2405 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>, 2406 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, 2407 NotMemoryFoldable; 2408 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem, 2409 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2410 _.ScalarMemOp:$src2, u8imm:$cc), 2411 !strconcat("vpcmp", Suffix, 2412 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2413 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>, 2414 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, 2415 NotMemoryFoldable; 2416 } 2417 2418 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 2419 (_.VT _.RC:$src1), cond)), 2420 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2421 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2422 2423 def : Pat<(and _.KRCWM:$mask, 2424 (_.KVT (CommFrag:$cc (X86VBroadcast 2425 (_.ScalarLdFrag addr:$src2)), 2426 (_.VT _.RC:$src1), cond))), 2427 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2428 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2429 (CommFrag.OperandTransform $cc))>; 2430} 2431 2432multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2433 PatFrag CommFrag, X86SchedWriteWidths sched, 2434 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2435 let Predicates = [prd] in 2436 defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM, 2437 VTInfo.info512, NAME>, EVEX_V512; 2438 2439 let Predicates = [prd, HasVLX] in { 2440 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM, 2441 VTInfo.info256, NAME>, EVEX_V256; 2442 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM, 2443 VTInfo.info128, NAME>, EVEX_V128; 2444 } 2445} 2446 2447multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2448 PatFrag CommFrag, X86SchedWriteWidths sched, 2449 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2450 let Predicates = [prd] in 2451 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM, 2452 VTInfo.info512, NAME>, EVEX_V512; 2453 2454 let Predicates = [prd, HasVLX] in { 2455 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM, 2456 VTInfo.info256, NAME>, EVEX_V256; 2457 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM, 2458 VTInfo.info128, NAME>, EVEX_V128; 2459 } 2460} 2461 2462def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2464 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2465 return getI8Imm(SSECC, SDLoc(N)); 2466}]>; 2467 2468// Swapped operand version of the above. 2469def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2470 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2471 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2472 SSECC = X86::getSwappedVPCMPImm(SSECC); 2473 return getI8Imm(SSECC, SDLoc(N)); 2474}]>; 2475 2476def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2477 (setcc node:$src1, node:$src2, node:$cc), [{ 2478 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2479 return !ISD::isUnsignedIntSetCC(CC); 2480}], X86pcmpm_imm>; 2481 2482// Same as above, but commutes immediate. Use for load folding. 2483def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2484 (setcc node:$src1, node:$src2, node:$cc), [{ 2485 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2486 return !ISD::isUnsignedIntSetCC(CC); 2487}], X86pcmpm_imm_commute>; 2488 2489def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2490 (setcc node:$src1, node:$src2, node:$cc), [{ 2491 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2492 return ISD::isUnsignedIntSetCC(CC); 2493}], X86pcmpm_imm>; 2494 2495// Same as above, but commutes immediate. Use for load folding. 2496def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2497 (setcc node:$src1, node:$src2, node:$cc), [{ 2498 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2499 return ISD::isUnsignedIntSetCC(CC); 2500}], X86pcmpm_imm_commute>; 2501 2502// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2503defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute, 2504 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2505 EVEX_CD8<8, CD8VF>; 2506defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute, 2507 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2508 EVEX_CD8<8, CD8VF>; 2509 2510defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute, 2511 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2512 VEX_W, EVEX_CD8<16, CD8VF>; 2513defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute, 2514 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2515 VEX_W, EVEX_CD8<16, CD8VF>; 2516 2517defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute, 2518 SchedWriteVecALU, avx512vl_i32_info, 2519 HasAVX512>, EVEX_CD8<32, CD8VF>; 2520defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute, 2521 SchedWriteVecALU, avx512vl_i32_info, 2522 HasAVX512>, EVEX_CD8<32, CD8VF>; 2523 2524defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute, 2525 SchedWriteVecALU, avx512vl_i64_info, 2526 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2527defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute, 2528 SchedWriteVecALU, avx512vl_i64_info, 2529 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2530 2531multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2532 string Name> { 2533 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2534 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), 2535 "vcmp${cc}"#_.Suffix, 2536 "$src2, $src1", "$src1, $src2", 2537 (X86cmpm (_.VT _.RC:$src1), 2538 (_.VT _.RC:$src2), 2539 imm:$cc), 1>, 2540 Sched<[sched]>; 2541 2542 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2543 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), 2544 "vcmp${cc}"#_.Suffix, 2545 "$src2, $src1", "$src1, $src2", 2546 (X86cmpm (_.VT _.RC:$src1), 2547 (_.VT (bitconvert (_.LdFrag addr:$src2))), 2548 imm:$cc)>, 2549 Sched<[sched.Folded, ReadAfterLd]>; 2550 2551 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2552 (outs _.KRC:$dst), 2553 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), 2554 "vcmp${cc}"#_.Suffix, 2555 "${src2}"##_.BroadcastStr##", $src1", 2556 "$src1, ${src2}"##_.BroadcastStr, 2557 (X86cmpm (_.VT _.RC:$src1), 2558 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 2559 imm:$cc)>, 2560 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2561 // Accept explicit immediate argument form instead of comparison code. 2562 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2563 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2564 (outs _.KRC:$dst), 2565 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2566 "vcmp"#_.Suffix, 2567 "$cc, $src2, $src1", "$src1, $src2, $cc">, 2568 Sched<[sched]>, NotMemoryFoldable; 2569 2570 let mayLoad = 1 in { 2571 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, 2572 (outs _.KRC:$dst), 2573 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2574 "vcmp"#_.Suffix, 2575 "$cc, $src2, $src1", "$src1, $src2, $cc">, 2576 Sched<[sched.Folded, ReadAfterLd]>, 2577 NotMemoryFoldable; 2578 2579 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, 2580 (outs _.KRC:$dst), 2581 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2582 "vcmp"#_.Suffix, 2583 "$cc, ${src2}"##_.BroadcastStr##", $src1", 2584 "$src1, ${src2}"##_.BroadcastStr##", $cc">, 2585 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, 2586 NotMemoryFoldable; 2587 } 2588 } 2589 2590 // Patterns for selecting with loads in other operand. 2591 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2592 CommutableCMPCC:$cc), 2593 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2594 imm:$cc)>; 2595 2596 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2), 2597 (_.VT _.RC:$src1), 2598 CommutableCMPCC:$cc)), 2599 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2600 _.RC:$src1, addr:$src2, 2601 imm:$cc)>; 2602 2603 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 2604 (_.VT _.RC:$src1), CommutableCMPCC:$cc), 2605 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2606 imm:$cc)>; 2607 2608 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast 2609 (_.ScalarLdFrag addr:$src2)), 2610 (_.VT _.RC:$src1), 2611 CommutableCMPCC:$cc)), 2612 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2613 _.RC:$src1, addr:$src2, 2614 imm:$cc)>; 2615} 2616 2617multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2618 // comparison code form (VCMP[EQ/LT/LE/...] 2619 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2620 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), 2621 "vcmp${cc}"#_.Suffix, 2622 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 2623 (X86cmpmRnd (_.VT _.RC:$src1), 2624 (_.VT _.RC:$src2), 2625 imm:$cc, 2626 (i32 FROUND_NO_EXC))>, 2627 EVEX_B, Sched<[sched]>; 2628 2629 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2630 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2631 (outs _.KRC:$dst), 2632 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2633 "vcmp"#_.Suffix, 2634 "$cc, {sae}, $src2, $src1", 2635 "$src1, $src2, {sae}, $cc">, 2636 EVEX_B, Sched<[sched]>, NotMemoryFoldable; 2637 } 2638} 2639 2640multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2641 let Predicates = [HasAVX512] in { 2642 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2643 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2644 2645 } 2646 let Predicates = [HasAVX512,HasVLX] in { 2647 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2648 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2649 } 2650} 2651 2652defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2653 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2654defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2655 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2656 2657// Patterns to select fp compares with load as first operand. 2658let Predicates = [HasAVX512] in { 2659 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2660 CommutableCMPCC:$cc)), 2661 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; 2662 2663 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2664 CommutableCMPCC:$cc)), 2665 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; 2666} 2667 2668// ---------------------------------------------------------------- 2669// FPClass 2670//handle fpclass instruction mask = op(reg_scalar,imm) 2671// op(mem_scalar,imm) 2672multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, 2673 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2674 Predicate prd> { 2675 let Predicates = [prd], ExeDomain = _.ExeDomain in { 2676 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2677 (ins _.RC:$src1, i32u8imm:$src2), 2678 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2679 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), 2680 (i32 imm:$src2)))]>, 2681 Sched<[sched]>; 2682 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2683 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2684 OpcodeStr##_.Suffix# 2685 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2686 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2687 (OpNode (_.VT _.RC:$src1), 2688 (i32 imm:$src2))))]>, 2689 EVEX_K, Sched<[sched]>; 2690 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2691 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2692 OpcodeStr##_.Suffix## 2693 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2694 [(set _.KRC:$dst, 2695 (OpNode _.ScalarIntMemCPat:$src1, 2696 (i32 imm:$src2)))]>, 2697 Sched<[sched.Folded, ReadAfterLd]>; 2698 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2699 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2700 OpcodeStr##_.Suffix## 2701 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2702 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2703 (OpNode _.ScalarIntMemCPat:$src1, 2704 (i32 imm:$src2))))]>, 2705 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2706 } 2707} 2708 2709//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2710// fpclass(reg_vec, mem_vec, imm) 2711// fpclass(reg_vec, broadcast(eltVt), imm) 2712multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, 2713 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2714 string mem, string broadcast>{ 2715 let ExeDomain = _.ExeDomain in { 2716 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2717 (ins _.RC:$src1, i32u8imm:$src2), 2718 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2719 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), 2720 (i32 imm:$src2)))]>, 2721 Sched<[sched]>; 2722 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2723 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2724 OpcodeStr##_.Suffix# 2725 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2726 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2727 (OpNode (_.VT _.RC:$src1), 2728 (i32 imm:$src2))))]>, 2729 EVEX_K, Sched<[sched]>; 2730 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2731 (ins _.MemOp:$src1, i32u8imm:$src2), 2732 OpcodeStr##_.Suffix##mem# 2733 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2734 [(set _.KRC:$dst,(OpNode 2735 (_.VT (bitconvert (_.LdFrag addr:$src1))), 2736 (i32 imm:$src2)))]>, 2737 Sched<[sched.Folded, ReadAfterLd]>; 2738 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2739 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2740 OpcodeStr##_.Suffix##mem# 2741 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2742 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode 2743 (_.VT (bitconvert (_.LdFrag addr:$src1))), 2744 (i32 imm:$src2))))]>, 2745 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2746 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2747 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2748 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## 2749 _.BroadcastStr##", $dst|$dst, ${src1}" 2750 ##_.BroadcastStr##", $src2}", 2751 [(set _.KRC:$dst,(OpNode 2752 (_.VT (X86VBroadcast 2753 (_.ScalarLdFrag addr:$src1))), 2754 (i32 imm:$src2)))]>, 2755 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2756 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2757 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2758 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## 2759 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## 2760 _.BroadcastStr##", $src2}", 2761 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode 2762 (_.VT (X86VBroadcast 2763 (_.ScalarLdFrag addr:$src1))), 2764 (i32 imm:$src2))))]>, 2765 EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2766 } 2767} 2768 2769multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2770 bits<8> opc, SDNode OpNode, 2771 X86SchedWriteWidths sched, Predicate prd, 2772 string broadcast>{ 2773 let Predicates = [prd] in { 2774 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM, 2775 _.info512, "{z}", broadcast>, EVEX_V512; 2776 } 2777 let Predicates = [prd, HasVLX] in { 2778 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM, 2779 _.info128, "{x}", broadcast>, EVEX_V128; 2780 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM, 2781 _.info256, "{y}", broadcast>, EVEX_V256; 2782 } 2783} 2784 2785multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2786 bits<8> opcScalar, SDNode VecOpNode, 2787 SDNode ScalarOpNode, X86SchedWriteWidths sched, 2788 Predicate prd> { 2789 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2790 VecOpNode, sched, prd, "{l}">, 2791 EVEX_CD8<32, CD8VF>; 2792 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2793 VecOpNode, sched, prd, "{q}">, 2794 EVEX_CD8<64, CD8VF> , VEX_W; 2795 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, 2796 sched.Scl, f32x_info, prd>, 2797 EVEX_CD8<32, CD8VT1>; 2798 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, 2799 sched.Scl, f64x_info, prd>, 2800 EVEX_CD8<64, CD8VT1>, VEX_W; 2801} 2802 2803defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, 2804 X86Vfpclasss, SchedWriteFCmp, HasDQI>, 2805 AVX512AIi8Base, EVEX; 2806 2807//----------------------------------------------------------------- 2808// Mask register copy, including 2809// - copy between mask registers 2810// - load/store mask registers 2811// - copy from GPR to mask register and vice versa 2812// 2813multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2814 string OpcodeStr, RegisterClass KRC, 2815 ValueType vvt, X86MemOperand x86memop> { 2816 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2817 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2819 Sched<[WriteMove]>; 2820 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2821 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2822 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2823 Sched<[WriteLoad]>; 2824 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2825 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2826 [(store KRC:$src, addr:$dst)]>, 2827 Sched<[WriteStore]>; 2828} 2829 2830multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2831 string OpcodeStr, 2832 RegisterClass KRC, RegisterClass GRC> { 2833 let hasSideEffects = 0 in { 2834 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2835 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2836 Sched<[WriteMove]>; 2837 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2838 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2839 Sched<[WriteMove]>; 2840 } 2841} 2842 2843let Predicates = [HasDQI] in 2844 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2845 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2846 VEX, PD; 2847 2848let Predicates = [HasAVX512] in 2849 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2850 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2851 VEX, PS; 2852 2853let Predicates = [HasBWI] in { 2854 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2855 VEX, PD, VEX_W; 2856 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2857 VEX, XD; 2858 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2859 VEX, PS, VEX_W; 2860 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2861 VEX, XD, VEX_W; 2862} 2863 2864// GR from/to mask register 2865def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2866 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2867def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2868 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2869 2870def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2871 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2872def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2873 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2874 2875def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2876 (KMOVWrk VK16:$src)>; 2877def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2878 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2879 2880def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2881 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2882def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2883 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2884 2885def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2886 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2887def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2888 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2889def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2890 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2891def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2892 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2893 2894// Load/store kreg 2895let Predicates = [HasDQI] in { 2896 def : Pat<(store VK1:$src, addr:$dst), 2897 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; 2898 2899 def : Pat<(v1i1 (load addr:$src)), 2900 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2901 def : Pat<(v2i1 (load addr:$src)), 2902 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2903 def : Pat<(v4i1 (load addr:$src)), 2904 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2905} 2906 2907let Predicates = [HasAVX512] in { 2908 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2909 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2910} 2911 2912let Predicates = [HasAVX512] in { 2913 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2914 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2915 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2916 2917 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2918 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2919 } 2920 2921 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2922 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2923 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2924 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2925 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2926 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2927 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2928 2929 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2930 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2931 (COPY_TO_REGCLASS 2932 (KMOVWkr (AND32ri8 2933 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2934 (i32 1))), VK16)>; 2935} 2936 2937// Mask unary operation 2938// - KNOT 2939multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2940 RegisterClass KRC, SDPatternOperator OpNode, 2941 X86FoldableSchedWrite sched, Predicate prd> { 2942 let Predicates = [prd] in 2943 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2944 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2945 [(set KRC:$dst, (OpNode KRC:$src))]>, 2946 Sched<[sched]>; 2947} 2948 2949multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2950 SDPatternOperator OpNode, 2951 X86FoldableSchedWrite sched> { 2952 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2953 sched, HasDQI>, VEX, PD; 2954 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2955 sched, HasAVX512>, VEX, PS; 2956 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2957 sched, HasBWI>, VEX, PD, VEX_W; 2958 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2959 sched, HasBWI>, VEX, PS, VEX_W; 2960} 2961 2962// TODO - do we need a X86SchedWriteWidths::KMASK type? 2963defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2964 2965// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2966let Predicates = [HasAVX512, NoDQI] in 2967def : Pat<(vnot VK8:$src), 2968 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2969 2970def : Pat<(vnot VK4:$src), 2971 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2972def : Pat<(vnot VK2:$src), 2973 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2974 2975// Mask binary operation 2976// - KAND, KANDN, KOR, KXNOR, KXOR 2977multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2978 RegisterClass KRC, SDPatternOperator OpNode, 2979 X86FoldableSchedWrite sched, Predicate prd, 2980 bit IsCommutable> { 2981 let Predicates = [prd], isCommutable = IsCommutable in 2982 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2983 !strconcat(OpcodeStr, 2984 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2985 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2986 Sched<[sched]>; 2987} 2988 2989multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2990 SDPatternOperator OpNode, 2991 X86FoldableSchedWrite sched, bit IsCommutable, 2992 Predicate prdW = HasAVX512> { 2993 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2994 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 2995 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2996 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 2997 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2998 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 2999 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3000 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3001} 3002 3003def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; 3004def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; 3005// These nodes use 'vnot' instead of 'not' to support vectors. 3006def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3007def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3008 3009// TODO - do we need a X86SchedWriteWidths::KMASK type? 3010defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3011defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3012defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3013defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3014defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3015defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3016 3017multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, 3018 Instruction Inst> { 3019 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3020 // for the DQI set, this type is legal and KxxxB instruction is used 3021 let Predicates = [NoDQI] in 3022 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3023 (COPY_TO_REGCLASS 3024 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3025 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3026 3027 // All types smaller than 8 bits require conversion anyway 3028 def : Pat<(OpNode VK1:$src1, VK1:$src2), 3029 (COPY_TO_REGCLASS (Inst 3030 (COPY_TO_REGCLASS VK1:$src1, VK16), 3031 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3032 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3033 (COPY_TO_REGCLASS (Inst 3034 (COPY_TO_REGCLASS VK2:$src1, VK16), 3035 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; 3036 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3037 (COPY_TO_REGCLASS (Inst 3038 (COPY_TO_REGCLASS VK4:$src1, VK16), 3039 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; 3040} 3041 3042defm : avx512_binop_pat<and, and, KANDWrr>; 3043defm : avx512_binop_pat<vandn, andn, KANDNWrr>; 3044defm : avx512_binop_pat<or, or, KORWrr>; 3045defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>; 3046defm : avx512_binop_pat<xor, xor, KXORWrr>; 3047 3048// Mask unpacking 3049multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT, 3050 RegisterClass KRCSrc, X86FoldableSchedWrite sched, 3051 Predicate prd> { 3052 let Predicates = [prd] in { 3053 let hasSideEffects = 0 in 3054 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), 3055 (ins KRC:$src1, KRC:$src2), 3056 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3057 VEX_4V, VEX_L, Sched<[sched]>; 3058 3059 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)), 3060 (!cast<Instruction>(NAME##rr) 3061 (COPY_TO_REGCLASS KRCSrc:$src2, KRC), 3062 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>; 3063 } 3064} 3065 3066defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD; 3067defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS; 3068defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W; 3069 3070// Mask bit testing 3071multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3072 SDNode OpNode, X86FoldableSchedWrite sched, 3073 Predicate prd> { 3074 let Predicates = [prd], Defs = [EFLAGS] in 3075 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3076 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3077 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3078 Sched<[sched]>; 3079} 3080 3081multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3082 X86FoldableSchedWrite sched, 3083 Predicate prdW = HasAVX512> { 3084 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3085 VEX, PD; 3086 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3087 VEX, PS; 3088 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3089 VEX, PS, VEX_W; 3090 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3091 VEX, PD, VEX_W; 3092} 3093 3094// TODO - do we need a X86SchedWriteWidths::KMASK type? 3095defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3096defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3097 3098// Mask shift 3099multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3100 SDNode OpNode, X86FoldableSchedWrite sched> { 3101 let Predicates = [HasAVX512] in 3102 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3103 !strconcat(OpcodeStr, 3104 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3105 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>, 3106 Sched<[sched]>; 3107} 3108 3109multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3110 SDNode OpNode, X86FoldableSchedWrite sched> { 3111 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3112 sched>, VEX, TAPD, VEX_W; 3113 let Predicates = [HasDQI] in 3114 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3115 sched>, VEX, TAPD; 3116 let Predicates = [HasBWI] in { 3117 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3118 sched>, VEX, TAPD, VEX_W; 3119 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3120 sched>, VEX, TAPD; 3121 } 3122} 3123 3124defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3125defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3126 3127// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3128multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr, 3129 X86VectorVTInfo Narrow, 3130 X86VectorVTInfo Wide> { 3131 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), 3132 (Narrow.VT Narrow.RC:$src2))), 3133 (COPY_TO_REGCLASS 3134 (!cast<Instruction>(InstStr#"Zrr") 3135 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3136 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), 3137 Narrow.KRC)>; 3138 3139 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3140 (Frag (Narrow.VT Narrow.RC:$src1), 3141 (Narrow.VT Narrow.RC:$src2)))), 3142 (COPY_TO_REGCLASS 3143 (!cast<Instruction>(InstStr#"Zrrk") 3144 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3145 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3146 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), 3147 Narrow.KRC)>; 3148} 3149 3150// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3151multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, 3152 string InstStr, 3153 X86VectorVTInfo Narrow, 3154 X86VectorVTInfo Wide> { 3155def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3156 (Narrow.VT Narrow.RC:$src2), cond)), 3157 (COPY_TO_REGCLASS 3158 (!cast<Instruction>(InstStr##Zrri) 3159 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3160 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3161 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3162 3163def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3164 (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3165 (Narrow.VT Narrow.RC:$src2), 3166 cond)))), 3167 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) 3168 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3169 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3170 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3171 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3172} 3173 3174// Same as above, but for fp types which don't use PatFrags. 3175multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr, 3176 X86VectorVTInfo Narrow, 3177 X86VectorVTInfo Wide> { 3178def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), 3179 (Narrow.VT Narrow.RC:$src2), imm:$cc)), 3180 (COPY_TO_REGCLASS 3181 (!cast<Instruction>(InstStr##Zrri) 3182 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3183 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3184 imm:$cc), Narrow.KRC)>; 3185 3186def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3187 (OpNode (Narrow.VT Narrow.RC:$src1), 3188 (Narrow.VT Narrow.RC:$src2), imm:$cc))), 3189 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) 3190 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3193 imm:$cc), Narrow.KRC)>; 3194} 3195 3196let Predicates = [HasAVX512, NoVLX] in { 3197 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 3198 // increase the pattern complexity the way an immediate would. 3199 let AddedComplexity = 2 in { 3200 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>; 3201 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>; 3202 3203 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>; 3204 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>; 3205 3206 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>; 3207 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>; 3208 3209 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>; 3210 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>; 3211 } 3212 3213 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>; 3214 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>; 3215 3216 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>; 3217 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>; 3218 3219 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>; 3220 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>; 3221 3222 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>; 3223 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>; 3224 3225 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>; 3226 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>; 3227 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>; 3228 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>; 3229} 3230 3231let Predicates = [HasBWI, NoVLX] in { 3232 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 3233 // increase the pattern complexity the way an immediate would. 3234 let AddedComplexity = 2 in { 3235 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>; 3236 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>; 3237 3238 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>; 3239 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>; 3240 3241 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>; 3242 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>; 3243 3244 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>; 3245 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>; 3246 } 3247 3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>; 3249 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>; 3250 3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>; 3252 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>; 3253 3254 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>; 3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>; 3256 3257 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>; 3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>; 3259} 3260 3261// Mask setting all 0s or 1s 3262multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { 3263 let Predicates = [HasAVX512] in 3264 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3265 SchedRW = [WriteZero] in 3266 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3267 [(set KRC:$dst, (VT Val))]>; 3268} 3269 3270multiclass avx512_mask_setop_w<PatFrag Val> { 3271 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3272 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3273 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3274} 3275 3276defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3277defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3278 3279// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3280let Predicates = [HasAVX512] in { 3281 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3282 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3283 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3284 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3285 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3286 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3287 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3288 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3289} 3290 3291// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3292multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3293 RegisterClass RC, ValueType VT> { 3294 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3295 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3296 3297 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3298 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3299} 3300defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3301defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3302defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3303defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3304defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3305defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3306 3307defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3308defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3309defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3310defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3311defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3312 3313defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3314defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3315defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3316defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3317 3318defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3319defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3320defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3321 3322defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3323defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3324 3325defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3326 3327//===----------------------------------------------------------------------===// 3328// AVX-512 - Aligned and unaligned load and store 3329// 3330 3331multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3332 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3333 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3334 bit NoRMPattern = 0, 3335 SDPatternOperator SelectOprr = vselect> { 3336 let hasSideEffects = 0 in { 3337 let isMoveReg = 1 in 3338 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3339 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3340 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3341 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3342 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3343 (ins _.KRCWM:$mask, _.RC:$src), 3344 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3345 "${dst} {${mask}} {z}, $src}"), 3346 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3347 (_.VT _.RC:$src), 3348 _.ImmAllZerosV)))], _.ExeDomain>, 3349 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3350 3351 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3352 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3353 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3354 !if(NoRMPattern, [], 3355 [(set _.RC:$dst, 3356 (_.VT (bitconvert (ld_frag addr:$src))))]), 3357 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3358 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3359 3360 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3361 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3362 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3363 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3364 "${dst} {${mask}}, $src1}"), 3365 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3366 (_.VT _.RC:$src1), 3367 (_.VT _.RC:$src0))))], _.ExeDomain>, 3368 EVEX, EVEX_K, Sched<[Sched.RR]>; 3369 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3370 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3371 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3372 "${dst} {${mask}}, $src1}"), 3373 [(set _.RC:$dst, (_.VT 3374 (vselect _.KRCWM:$mask, 3375 (_.VT (bitconvert (ld_frag addr:$src1))), 3376 (_.VT _.RC:$src0))))], _.ExeDomain>, 3377 EVEX, EVEX_K, Sched<[Sched.RM]>; 3378 } 3379 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3380 (ins _.KRCWM:$mask, _.MemOp:$src), 3381 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3382 "${dst} {${mask}} {z}, $src}", 3383 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, 3384 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))], 3385 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3386 } 3387 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3388 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3389 3390 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3391 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3392 3393 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3394 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0, 3395 _.KRCWM:$mask, addr:$ptr)>; 3396} 3397 3398multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3399 AVX512VLVectorVTInfo _, Predicate prd, 3400 X86SchedWriteMoveLSWidths Sched, 3401 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3402 let Predicates = [prd] in 3403 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3404 _.info512.AlignedLdFrag, masked_load_aligned512, 3405 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3406 3407 let Predicates = [prd, HasVLX] in { 3408 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3409 _.info256.AlignedLdFrag, masked_load_aligned256, 3410 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3411 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3412 _.info128.AlignedLdFrag, masked_load_aligned128, 3413 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3414 } 3415} 3416 3417multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3418 AVX512VLVectorVTInfo _, Predicate prd, 3419 X86SchedWriteMoveLSWidths Sched, 3420 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3421 SDPatternOperator SelectOprr = vselect> { 3422 let Predicates = [prd] in 3423 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3424 masked_load_unaligned, Sched.ZMM, "", 3425 NoRMPattern, SelectOprr>, EVEX_V512; 3426 3427 let Predicates = [prd, HasVLX] in { 3428 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3429 masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y", 3430 NoRMPattern, SelectOprr>, EVEX_V256; 3431 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3432 masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd, 3433 NoRMPattern, SelectOprr>, EVEX_V128; 3434 } 3435} 3436 3437multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3438 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3439 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3440 bit NoMRPattern = 0> { 3441 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3442 let isMoveReg = 1 in 3443 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3444 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3445 [], _.ExeDomain>, EVEX, 3446 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3447 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3448 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3449 (ins _.KRCWM:$mask, _.RC:$src), 3450 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3451 "${dst} {${mask}}, $src}", 3452 [], _.ExeDomain>, EVEX, EVEX_K, 3453 FoldGenData<BaseName#_.ZSuffix#rrk>, 3454 Sched<[Sched.RR]>; 3455 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3456 (ins _.KRCWM:$mask, _.RC:$src), 3457 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3458 "${dst} {${mask}} {z}, $src}", 3459 [], _.ExeDomain>, EVEX, EVEX_KZ, 3460 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3461 Sched<[Sched.RR]>; 3462 } 3463 3464 let hasSideEffects = 0, mayStore = 1 in 3465 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3466 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3467 !if(NoMRPattern, [], 3468 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3469 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3470 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3471 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3472 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3473 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3474 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3475 NotMemoryFoldable; 3476 3477 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), 3478 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3479 _.KRCWM:$mask, _.RC:$src)>; 3480 3481 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3482 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3483 _.RC:$dst, _.RC:$src), 0>; 3484 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3485 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3486 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3487 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3488 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3489 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3490} 3491 3492multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3493 AVX512VLVectorVTInfo _, Predicate prd, 3494 X86SchedWriteMoveLSWidths Sched, 3495 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3496 let Predicates = [prd] in 3497 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3498 masked_store_unaligned, Sched.ZMM, "", 3499 NoMRPattern>, EVEX_V512; 3500 let Predicates = [prd, HasVLX] in { 3501 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3502 masked_store_unaligned, Sched.YMM, 3503 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3504 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3505 masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd, 3506 NoMRPattern>, EVEX_V128; 3507 } 3508} 3509 3510multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3511 AVX512VLVectorVTInfo _, Predicate prd, 3512 X86SchedWriteMoveLSWidths Sched, 3513 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3514 let Predicates = [prd] in 3515 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3516 masked_store_aligned512, Sched.ZMM, "", 3517 NoMRPattern>, EVEX_V512; 3518 3519 let Predicates = [prd, HasVLX] in { 3520 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3521 masked_store_aligned256, Sched.YMM, 3522 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3523 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3524 masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd, 3525 NoMRPattern>, EVEX_V128; 3526 } 3527} 3528 3529defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3530 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3531 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3532 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3533 PS, EVEX_CD8<32, CD8VF>; 3534 3535defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3536 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3537 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3538 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3539 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3540 3541defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3542 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3543 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3544 SchedWriteFMoveLS, "VMOVUPS">, 3545 PS, EVEX_CD8<32, CD8VF>; 3546 3547defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3548 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3549 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3550 SchedWriteFMoveLS, "VMOVUPD">, 3551 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3552 3553defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3554 HasAVX512, SchedWriteVecMoveLS, 3555 "VMOVDQA", 1>, 3556 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3557 HasAVX512, SchedWriteVecMoveLS, 3558 "VMOVDQA", 1>, 3559 PD, EVEX_CD8<32, CD8VF>; 3560 3561defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3562 HasAVX512, SchedWriteVecMoveLS, 3563 "VMOVDQA">, 3564 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3565 HasAVX512, SchedWriteVecMoveLS, 3566 "VMOVDQA">, 3567 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3568 3569defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3570 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3571 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3572 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3573 XD, EVEX_CD8<8, CD8VF>; 3574 3575defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3576 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3577 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3578 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3579 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3580 3581defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3582 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3583 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3584 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3585 XS, EVEX_CD8<32, CD8VF>; 3586 3587defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3588 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3589 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3590 SchedWriteVecMoveLS, "VMOVDQU">, 3591 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3592 3593/* 3594// Special instructions to help with spilling when we don't have VLX. We need 3595// to load or store from a ZMM register instead. These are converted in 3596// expandPostRAPseudos. 3597let isReMaterializable = 1, canFoldAsLoad = 1, 3598 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3599def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3600 "", []>, Sched<[WriteFLoadX]>; 3601def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3602 "", []>, Sched<[WriteFLoadY]>; 3603def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3604 "", []>, Sched<[WriteFLoadX]>; 3605def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3606 "", []>, Sched<[WriteFLoadY]>; 3607} 3608 3609let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3610def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3611 "", []>, Sched<[WriteFStoreX]>; 3612def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3613 "", []>, Sched<[WriteFStoreY]>; 3614def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3615 "", []>, Sched<[WriteFStoreX]>; 3616def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3617 "", []>, Sched<[WriteFStoreY]>; 3618} 3619*/ 3620 3621def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), 3622 (v8i64 VR512:$src))), 3623 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3624 VK8), VR512:$src)>; 3625 3626def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3627 (v16i32 VR512:$src))), 3628 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3629 3630// These patterns exist to prevent the above patterns from introducing a second 3631// mask inversion when one already exists. 3632def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), 3633 (bc_v8i64 (v16i32 immAllZerosV)), 3634 (v8i64 VR512:$src))), 3635 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3636def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), 3637 (v16i32 immAllZerosV), 3638 (v16i32 VR512:$src))), 3639 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3640 3641multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3642 X86VectorVTInfo Wide> { 3643 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3644 Narrow.RC:$src1, Narrow.RC:$src0)), 3645 (EXTRACT_SUBREG 3646 (Wide.VT 3647 (!cast<Instruction>(InstrStr#"rrk") 3648 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3649 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3650 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3651 Narrow.SubRegIdx)>; 3652 3653 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3654 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3655 (EXTRACT_SUBREG 3656 (Wide.VT 3657 (!cast<Instruction>(InstrStr#"rrkz") 3658 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3659 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3660 Narrow.SubRegIdx)>; 3661} 3662 3663// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3664// available. Use a 512-bit operation and extract. 3665let Predicates = [HasAVX512, NoVLX] in { 3666 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3667 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3668 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3669 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3670 3671 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3672 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3673 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3674 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3675} 3676 3677let Predicates = [HasBWI, NoVLX] in { 3678 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3679 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3680 3681 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3682 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3683} 3684 3685let Predicates = [HasAVX512] in { 3686 // 512-bit store. 3687 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3688 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3689 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3690 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3691 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3692 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3693 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3694 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3695 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3696 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3697 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3698 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3699} 3700 3701let Predicates = [HasVLX] in { 3702 // 128-bit store. 3703 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3704 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3705 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3706 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3707 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3708 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3709 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3710 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3711 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3712 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3713 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3714 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3715 3716 // 256-bit store. 3717 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3718 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3719 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3720 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3721 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3722 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3723 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3724 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3725 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3726 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3727 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3728 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3729} 3730 3731multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From, 3732 X86VectorVTInfo To, X86VectorVTInfo Cast> { 3733 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 3734 (bitconvert 3735 (To.VT (extract_subvector 3736 (From.VT From.RC:$src), (iPTR 0)))), 3737 To.RC:$src0)), 3738 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 3739 Cast.RC:$src0, Cast.KRCWM:$mask, 3740 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>; 3741 3742 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 3743 (bitconvert 3744 (To.VT (extract_subvector 3745 (From.VT From.RC:$src), (iPTR 0)))), 3746 Cast.ImmAllZerosV)), 3747 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 3748 Cast.KRCWM:$mask, 3749 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>; 3750} 3751 3752 3753let Predicates = [HasVLX] in { 3754// A masked extract from the first 128-bits of a 256-bit vector can be 3755// implemented with masked move. 3756defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>; 3757defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>; 3758defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>; 3759defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>; 3760defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>; 3761defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>; 3762defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>; 3763defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>; 3764defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>; 3765defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>; 3766defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>; 3767defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>; 3768 3769// A masked extract from the first 128-bits of a 512-bit vector can be 3770// implemented with masked move. 3771defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>; 3772defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>; 3773defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>; 3774defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>; 3775defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>; 3776defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>; 3777defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>; 3778defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>; 3779defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>; 3780defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>; 3781defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>; 3782defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>; 3783 3784// A masked extract from the first 256-bits of a 512-bit vector can be 3785// implemented with masked move. 3786defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>; 3787defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>; 3788defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>; 3789defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>; 3790defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>; 3791defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>; 3792defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>; 3793defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>; 3794defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>; 3795defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>; 3796defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>; 3797defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>; 3798} 3799 3800// Move Int Doubleword to Packed Double Int 3801// 3802let ExeDomain = SSEPackedInt in { 3803def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3804 "vmovd\t{$src, $dst|$dst, $src}", 3805 [(set VR128X:$dst, 3806 (v4i32 (scalar_to_vector GR32:$src)))]>, 3807 EVEX, Sched<[WriteVecMoveFromGpr]>; 3808def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3809 "vmovd\t{$src, $dst|$dst, $src}", 3810 [(set VR128X:$dst, 3811 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3812 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3813def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3814 "vmovq\t{$src, $dst|$dst, $src}", 3815 [(set VR128X:$dst, 3816 (v2i64 (scalar_to_vector GR64:$src)))]>, 3817 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3818let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3819def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3820 (ins i64mem:$src), 3821 "vmovq\t{$src, $dst|$dst, $src}", []>, 3822 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3823let isCodeGenOnly = 1 in { 3824def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3825 "vmovq\t{$src, $dst|$dst, $src}", 3826 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3827 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3828def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src), 3829 "vmovq\t{$src, $dst|$dst, $src}", 3830 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>, 3831 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3832def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3833 "vmovq\t{$src, $dst|$dst, $src}", 3834 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3835 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3836def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), 3837 "vmovq\t{$src, $dst|$dst, $src}", 3838 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>, 3839 EVEX, VEX_W, Sched<[WriteVecStore]>, 3840 EVEX_CD8<64, CD8VT1>; 3841} 3842} // ExeDomain = SSEPackedInt 3843 3844// Move Int Doubleword to Single Scalar 3845// 3846let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3847def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3848 "vmovd\t{$src, $dst|$dst, $src}", 3849 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3850 EVEX, Sched<[WriteVecMoveFromGpr]>; 3851 3852def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), 3853 "vmovd\t{$src, $dst|$dst, $src}", 3854 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>, 3855 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3856} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3857 3858// Move doubleword from xmm register to r/m32 3859// 3860let ExeDomain = SSEPackedInt in { 3861def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3862 "vmovd\t{$src, $dst|$dst, $src}", 3863 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3864 (iPTR 0)))]>, 3865 EVEX, Sched<[WriteVecMoveToGpr]>; 3866def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3867 (ins i32mem:$dst, VR128X:$src), 3868 "vmovd\t{$src, $dst|$dst, $src}", 3869 [(store (i32 (extractelt (v4i32 VR128X:$src), 3870 (iPTR 0))), addr:$dst)]>, 3871 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3872} // ExeDomain = SSEPackedInt 3873 3874// Move quadword from xmm1 register to r/m64 3875// 3876let ExeDomain = SSEPackedInt in { 3877def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3878 "vmovq\t{$src, $dst|$dst, $src}", 3879 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3880 (iPTR 0)))]>, 3881 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3882 Requires<[HasAVX512]>; 3883 3884let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3885def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3886 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3887 EVEX, VEX_W, Sched<[WriteVecStore]>, 3888 Requires<[HasAVX512, In64BitMode]>; 3889 3890def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3891 (ins i64mem:$dst, VR128X:$src), 3892 "vmovq\t{$src, $dst|$dst, $src}", 3893 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3894 addr:$dst)]>, 3895 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3896 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3897 3898let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3899def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3900 (ins VR128X:$src), 3901 "vmovq\t{$src, $dst|$dst, $src}", []>, 3902 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3903} // ExeDomain = SSEPackedInt 3904 3905def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3906 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3907 3908// Move Scalar Single to Double Int 3909// 3910let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3911def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3912 (ins FR32X:$src), 3913 "vmovd\t{$src, $dst|$dst, $src}", 3914 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3915 EVEX, Sched<[WriteVecMoveToGpr]>; 3916def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3917 (ins i32mem:$dst, FR32X:$src), 3918 "vmovd\t{$src, $dst|$dst, $src}", 3919 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>, 3920 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3921} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3922 3923// Move Quadword Int to Packed Quadword Int 3924// 3925let ExeDomain = SSEPackedInt in { 3926def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3927 (ins i64mem:$src), 3928 "vmovq\t{$src, $dst|$dst, $src}", 3929 [(set VR128X:$dst, 3930 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3931 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3932} // ExeDomain = SSEPackedInt 3933 3934// Allow "vmovd" but print "vmovq". 3935def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3936 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3937def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3938 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3939 3940//===----------------------------------------------------------------------===// 3941// AVX-512 MOVSS, MOVSD 3942//===----------------------------------------------------------------------===// 3943 3944multiclass avx512_move_scalar<string asm, SDNode OpNode, 3945 X86VectorVTInfo _> { 3946 let Predicates = [HasAVX512, OptForSize] in 3947 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3948 (ins _.RC:$src1, _.RC:$src2), 3949 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3950 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3951 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3952 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3953 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3954 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3955 "$dst {${mask}} {z}, $src1, $src2}"), 3956 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3957 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3958 _.ImmAllZerosV)))], 3959 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3960 let Constraints = "$src0 = $dst" in 3961 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3962 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3963 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3964 "$dst {${mask}}, $src1, $src2}"), 3965 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3966 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3967 (_.VT _.RC:$src0))))], 3968 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3969 let canFoldAsLoad = 1, isReMaterializable = 1 in 3970 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3971 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3972 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3973 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3974 let mayLoad = 1, hasSideEffects = 0 in { 3975 let Constraints = "$src0 = $dst" in 3976 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3977 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3978 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3979 "$dst {${mask}}, $src}"), 3980 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3981 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3982 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3983 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3984 "$dst {${mask}} {z}, $src}"), 3985 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3986 } 3987 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3988 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3989 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3990 EVEX, Sched<[WriteFStore]>; 3991 let mayStore = 1, hasSideEffects = 0 in 3992 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3993 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), 3994 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3995 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 3996 NotMemoryFoldable; 3997} 3998 3999defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, 4000 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4001 4002defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, 4003 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4004 4005 4006multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4007 PatLeaf ZeroFP, X86VectorVTInfo _> { 4008 4009def : Pat<(_.VT (OpNode _.RC:$src0, 4010 (_.VT (scalar_to_vector 4011 (_.EltVT (X86selects VK1WM:$mask, 4012 (_.EltVT _.FRC:$src1), 4013 (_.EltVT _.FRC:$src2))))))), 4014 (!cast<Instruction>(InstrStr#rrk) 4015 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4016 VK1WM:$mask, 4017 (_.VT _.RC:$src0), 4018 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4019 4020def : Pat<(_.VT (OpNode _.RC:$src0, 4021 (_.VT (scalar_to_vector 4022 (_.EltVT (X86selects VK1WM:$mask, 4023 (_.EltVT _.FRC:$src1), 4024 (_.EltVT ZeroFP))))))), 4025 (!cast<Instruction>(InstrStr#rrkz) 4026 VK1WM:$mask, 4027 (_.VT _.RC:$src0), 4028 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4029} 4030 4031multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4032 dag Mask, RegisterClass MaskRC> { 4033 4034def : Pat<(masked_store addr:$dst, Mask, 4035 (_.info512.VT (insert_subvector undef, 4036 (_.info128.VT _.info128.RC:$src), 4037 (iPTR 0)))), 4038 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4039 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4040 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4041 4042} 4043 4044multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4045 AVX512VLVectorVTInfo _, 4046 dag Mask, RegisterClass MaskRC, 4047 SubRegIndex subreg> { 4048 4049def : Pat<(masked_store addr:$dst, Mask, 4050 (_.info512.VT (insert_subvector undef, 4051 (_.info128.VT _.info128.RC:$src), 4052 (iPTR 0)))), 4053 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4054 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4055 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4056 4057} 4058 4059// This matches the more recent codegen from clang that avoids emitting a 512 4060// bit masked store directly. Codegen will widen 128-bit masked store to 512 4061// bits on AVX512F only targets. 4062multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4063 AVX512VLVectorVTInfo _, 4064 dag Mask512, dag Mask128, 4065 RegisterClass MaskRC, 4066 SubRegIndex subreg> { 4067 4068// AVX512F pattern. 4069def : Pat<(masked_store addr:$dst, Mask512, 4070 (_.info512.VT (insert_subvector undef, 4071 (_.info128.VT _.info128.RC:$src), 4072 (iPTR 0)))), 4073 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4074 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4075 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4076 4077// AVX512VL pattern. 4078def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)), 4079 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4080 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4081 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4082} 4083 4084multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4085 dag Mask, RegisterClass MaskRC> { 4086 4087def : Pat<(_.info128.VT (extract_subvector 4088 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4089 (_.info512.VT (bitconvert 4090 (v16i32 immAllZerosV))))), 4091 (iPTR 0))), 4092 (!cast<Instruction>(InstrStr#rmkz) 4093 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4094 addr:$srcAddr)>; 4095 4096def : Pat<(_.info128.VT (extract_subvector 4097 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4098 (_.info512.VT (insert_subvector undef, 4099 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4100 (iPTR 0))))), 4101 (iPTR 0))), 4102 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4103 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4104 addr:$srcAddr)>; 4105 4106} 4107 4108multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4109 AVX512VLVectorVTInfo _, 4110 dag Mask, RegisterClass MaskRC, 4111 SubRegIndex subreg> { 4112 4113def : Pat<(_.info128.VT (extract_subvector 4114 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4115 (_.info512.VT (bitconvert 4116 (v16i32 immAllZerosV))))), 4117 (iPTR 0))), 4118 (!cast<Instruction>(InstrStr#rmkz) 4119 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4120 addr:$srcAddr)>; 4121 4122def : Pat<(_.info128.VT (extract_subvector 4123 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4124 (_.info512.VT (insert_subvector undef, 4125 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4126 (iPTR 0))))), 4127 (iPTR 0))), 4128 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4129 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4130 addr:$srcAddr)>; 4131 4132} 4133 4134// This matches the more recent codegen from clang that avoids emitting a 512 4135// bit masked load directly. Codegen will widen 128-bit masked load to 512 4136// bits on AVX512F only targets. 4137multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4138 AVX512VLVectorVTInfo _, 4139 dag Mask512, dag Mask128, 4140 RegisterClass MaskRC, 4141 SubRegIndex subreg> { 4142// AVX512F patterns. 4143def : Pat<(_.info128.VT (extract_subvector 4144 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4145 (_.info512.VT (bitconvert 4146 (v16i32 immAllZerosV))))), 4147 (iPTR 0))), 4148 (!cast<Instruction>(InstrStr#rmkz) 4149 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4150 addr:$srcAddr)>; 4151 4152def : Pat<(_.info128.VT (extract_subvector 4153 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4154 (_.info512.VT (insert_subvector undef, 4155 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4156 (iPTR 0))))), 4157 (iPTR 0))), 4158 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4159 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4160 addr:$srcAddr)>; 4161 4162// AVX512Vl patterns. 4163def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4164 (_.info128.VT (bitconvert (v4i32 immAllZerosV))))), 4165 (!cast<Instruction>(InstrStr#rmkz) 4166 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4167 addr:$srcAddr)>; 4168 4169def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4170 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4171 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4172 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4173 addr:$srcAddr)>; 4174} 4175 4176defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4177defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4178 4179defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4180 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4181defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4182 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4183defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4184 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4185 4186defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4187 (v16i1 (insert_subvector 4188 (v16i1 immAllZerosV), 4189 (v4i1 (extract_subvector 4190 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4191 (iPTR 0))), 4192 (iPTR 0))), 4193 (v4i1 (extract_subvector 4194 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4195 (iPTR 0))), GR8, sub_8bit>; 4196defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4197 (v8i1 4198 (extract_subvector 4199 (v16i1 4200 (insert_subvector 4201 (v16i1 immAllZerosV), 4202 (v2i1 (extract_subvector 4203 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4204 (iPTR 0))), 4205 (iPTR 0))), 4206 (iPTR 0))), 4207 (v2i1 (extract_subvector 4208 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4209 (iPTR 0))), GR8, sub_8bit>; 4210 4211defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4212 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4213defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4214 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4215defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4216 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4217 4218defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4219 (v16i1 (insert_subvector 4220 (v16i1 immAllZerosV), 4221 (v4i1 (extract_subvector 4222 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4223 (iPTR 0))), 4224 (iPTR 0))), 4225 (v4i1 (extract_subvector 4226 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4227 (iPTR 0))), GR8, sub_8bit>; 4228defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4229 (v8i1 4230 (extract_subvector 4231 (v16i1 4232 (insert_subvector 4233 (v16i1 immAllZerosV), 4234 (v2i1 (extract_subvector 4235 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4236 (iPTR 0))), 4237 (iPTR 0))), 4238 (iPTR 0))), 4239 (v2i1 (extract_subvector 4240 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4241 (iPTR 0))), GR8, sub_8bit>; 4242 4243def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4244 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4245 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4246 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4247 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4248 4249def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4250 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4251 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4252 4253def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4254 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4255 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4256 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4257 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4258 4259def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), 4260 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4261 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4262 4263let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4264 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4265 (ins VR128X:$src1, VR128X:$src2), 4266 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4267 []>, XS, EVEX_4V, VEX_LIG, 4268 FoldGenData<"VMOVSSZrr">, 4269 Sched<[SchedWriteFShuffle.XMM]>; 4270 4271 let Constraints = "$src0 = $dst" in 4272 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4273 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4274 VR128X:$src1, VR128X:$src2), 4275 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4276 "$dst {${mask}}, $src1, $src2}", 4277 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4278 FoldGenData<"VMOVSSZrrk">, 4279 Sched<[SchedWriteFShuffle.XMM]>; 4280 4281 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4282 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4283 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4284 "$dst {${mask}} {z}, $src1, $src2}", 4285 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4286 FoldGenData<"VMOVSSZrrkz">, 4287 Sched<[SchedWriteFShuffle.XMM]>; 4288 4289 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4290 (ins VR128X:$src1, VR128X:$src2), 4291 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4292 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4293 FoldGenData<"VMOVSDZrr">, 4294 Sched<[SchedWriteFShuffle.XMM]>; 4295 4296 let Constraints = "$src0 = $dst" in 4297 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4298 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4299 VR128X:$src1, VR128X:$src2), 4300 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4301 "$dst {${mask}}, $src1, $src2}", 4302 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4303 VEX_W, FoldGenData<"VMOVSDZrrk">, 4304 Sched<[SchedWriteFShuffle.XMM]>; 4305 4306 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4307 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4308 VR128X:$src2), 4309 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4310 "$dst {${mask}} {z}, $src1, $src2}", 4311 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4312 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4313 Sched<[SchedWriteFShuffle.XMM]>; 4314} 4315 4316def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4317 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4318def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4319 "$dst {${mask}}, $src1, $src2}", 4320 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4321 VR128X:$src1, VR128X:$src2), 0>; 4322def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4323 "$dst {${mask}} {z}, $src1, $src2}", 4324 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4325 VR128X:$src1, VR128X:$src2), 0>; 4326def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4327 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4328def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4329 "$dst {${mask}}, $src1, $src2}", 4330 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4331 VR128X:$src1, VR128X:$src2), 0>; 4332def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4333 "$dst {${mask}} {z}, $src1, $src2}", 4334 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4335 VR128X:$src1, VR128X:$src2), 0>; 4336 4337let Predicates = [HasAVX512, OptForSize] in { 4338 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4339 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4340 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4341 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4342 4343 // Move low f32 and clear high bits. 4344 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4345 (SUBREG_TO_REG (i32 0), 4346 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4347 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4348 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4349 (SUBREG_TO_REG (i32 0), 4350 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4351 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4352 4353 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4354 (SUBREG_TO_REG (i32 0), 4355 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)), 4356 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>; 4357 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4358 (SUBREG_TO_REG (i32 0), 4359 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)), 4360 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>; 4361 4362 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4363 (SUBREG_TO_REG (i32 0), 4364 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4365 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4366 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4367 (SUBREG_TO_REG (i32 0), 4368 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4369 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4370 4371 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4372 (SUBREG_TO_REG (i32 0), 4373 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)), 4374 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>; 4375 4376 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4377 (SUBREG_TO_REG (i32 0), 4378 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)), 4379 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>; 4380 4381} 4382 4383// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4384// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4385let Predicates = [HasAVX512, OptForSpeed] in { 4386 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4387 (SUBREG_TO_REG (i32 0), 4388 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4389 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4390 (i8 1))), sub_xmm)>; 4391 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4392 (SUBREG_TO_REG (i32 0), 4393 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4394 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4395 (i8 3))), sub_xmm)>; 4396 4397 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4398 (SUBREG_TO_REG (i32 0), 4399 (v2f64 (VBLENDPDrri (v2f64 (V_SET0)), 4400 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), 4401 (i8 1))), sub_xmm)>; 4402 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4403 (SUBREG_TO_REG (i32 0), 4404 (v2i64 (VPBLENDWrri (v2i64 (V_SET0)), 4405 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), 4406 (i8 0xf))), sub_xmm)>; 4407} 4408 4409let Predicates = [HasAVX512] in { 4410 4411 // MOVSSrm zeros the high parts of the register; represent this 4412 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 4413 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 4414 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; 4415 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 4416 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; 4417 def : Pat<(v4f32 (X86vzload addr:$src)), 4418 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; 4419 4420 // MOVSDrm zeros the high parts of the register; represent this 4421 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 4422 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 4423 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4424 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 4425 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4426 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 4427 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4428 def : Pat<(v2f64 (X86vzload addr:$src)), 4429 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4430 4431 // Represent the same patterns above but in the form they appear for 4432 // 256-bit types 4433 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4434 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 4435 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4436 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 4437 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 4438 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4439 def : Pat<(v8f32 (X86vzload addr:$src)), 4440 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4441 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 4442 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 4443 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4444 def : Pat<(v4f64 (X86vzload addr:$src)), 4445 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4446 4447 // Represent the same patterns above but in the form they appear for 4448 // 512-bit types 4449 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef, 4450 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 4451 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4452 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef, 4453 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 4454 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4455 def : Pat<(v16f32 (X86vzload addr:$src)), 4456 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4457 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef, 4458 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 4459 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4460 def : Pat<(v8f64 (X86vzload addr:$src)), 4461 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4462 4463 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4464 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), 4465 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4466 4467 // Extract and store. 4468 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))), 4469 addr:$dst), 4470 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; 4471} 4472 4473let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4474def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4475 (ins VR128X:$src), 4476 "vmovq\t{$src, $dst|$dst, $src}", 4477 [(set VR128X:$dst, (v2i64 (X86vzmovl 4478 (v2i64 VR128X:$src))))]>, 4479 EVEX, VEX_W; 4480} 4481 4482let Predicates = [HasAVX512] in { 4483 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4484 (VMOVDI2PDIZrr GR32:$src)>; 4485 4486 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4487 (VMOV64toPQIZrr GR64:$src)>; 4488 4489 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4490 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4491 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>; 4492 4493 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef, 4494 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4495 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>; 4496 4497 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4498 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 4499 (VMOVDI2PDIZrm addr:$src)>; 4500 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4501 (VMOVDI2PDIZrm addr:$src)>; 4502 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4503 (VMOVDI2PDIZrm addr:$src)>; 4504 def : Pat<(v4i32 (X86vzload addr:$src)), 4505 (VMOVDI2PDIZrm addr:$src)>; 4506 def : Pat<(v8i32 (X86vzload addr:$src)), 4507 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4508 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4509 (VMOVQI2PQIZrm addr:$src)>; 4510 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4511 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4512 def : Pat<(v2i64 (X86vzload addr:$src)), 4513 (VMOVQI2PQIZrm addr:$src)>; 4514 def : Pat<(v4i64 (X86vzload addr:$src)), 4515 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4516 4517 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 4518 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4519 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4520 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>; 4521 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef, 4522 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4523 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>; 4524 4525 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4526 def : Pat<(v16i32 (X86vzload addr:$src)), 4527 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4528 def : Pat<(v8i64 (X86vzload addr:$src)), 4529 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4530} 4531 4532//===----------------------------------------------------------------------===// 4533// AVX-512 - Non-temporals 4534//===----------------------------------------------------------------------===// 4535 4536def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4537 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4538 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4539 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4540 4541let Predicates = [HasVLX] in { 4542 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4543 (ins i256mem:$src), 4544 "vmovntdqa\t{$src, $dst|$dst, $src}", 4545 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4546 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4547 4548 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4549 (ins i128mem:$src), 4550 "vmovntdqa\t{$src, $dst|$dst, $src}", 4551 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4552 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4553} 4554 4555multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4556 X86SchedWriteMoveLS Sched, 4557 PatFrag st_frag = alignednontemporalstore> { 4558 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4559 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4560 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4561 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4562 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4563} 4564 4565multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4566 AVX512VLVectorVTInfo VTInfo, 4567 X86SchedWriteMoveLSWidths Sched> { 4568 let Predicates = [HasAVX512] in 4569 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4570 4571 let Predicates = [HasAVX512, HasVLX] in { 4572 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4573 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4574 } 4575} 4576 4577defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4578 SchedWriteVecMoveLSNT>, PD; 4579defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4580 SchedWriteFMoveLSNT>, PD, VEX_W; 4581defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4582 SchedWriteFMoveLSNT>, PS; 4583 4584let Predicates = [HasAVX512], AddedComplexity = 400 in { 4585 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4586 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4587 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4588 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4589 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4590 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4591 4592 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4593 (VMOVNTDQAZrm addr:$src)>; 4594 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4595 (VMOVNTDQAZrm addr:$src)>; 4596 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4597 (VMOVNTDQAZrm addr:$src)>; 4598} 4599 4600let Predicates = [HasVLX], AddedComplexity = 400 in { 4601 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4602 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4603 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4604 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4605 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4606 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4607 4608 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4609 (VMOVNTDQAZ256rm addr:$src)>; 4610 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4611 (VMOVNTDQAZ256rm addr:$src)>; 4612 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4613 (VMOVNTDQAZ256rm addr:$src)>; 4614 4615 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4616 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4617 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4618 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4619 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4620 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4621 4622 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4623 (VMOVNTDQAZ128rm addr:$src)>; 4624 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4625 (VMOVNTDQAZ128rm addr:$src)>; 4626 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4627 (VMOVNTDQAZ128rm addr:$src)>; 4628} 4629 4630//===----------------------------------------------------------------------===// 4631// AVX-512 - Integer arithmetic 4632// 4633multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4634 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4635 bit IsCommutable = 0> { 4636 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4637 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4638 "$src2, $src1", "$src1, $src2", 4639 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4640 IsCommutable>, AVX512BIBase, EVEX_4V, 4641 Sched<[sched]>; 4642 4643 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4644 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4645 "$src2, $src1", "$src1, $src2", 4646 (_.VT (OpNode _.RC:$src1, 4647 (bitconvert (_.LdFrag addr:$src2))))>, 4648 AVX512BIBase, EVEX_4V, 4649 Sched<[sched.Folded, ReadAfterLd]>; 4650} 4651 4652multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4653 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4654 bit IsCommutable = 0> : 4655 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4656 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4657 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4658 "${src2}"##_.BroadcastStr##", $src1", 4659 "$src1, ${src2}"##_.BroadcastStr, 4660 (_.VT (OpNode _.RC:$src1, 4661 (X86VBroadcast 4662 (_.ScalarLdFrag addr:$src2))))>, 4663 AVX512BIBase, EVEX_4V, EVEX_B, 4664 Sched<[sched.Folded, ReadAfterLd]>; 4665} 4666 4667multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4668 AVX512VLVectorVTInfo VTInfo, 4669 X86SchedWriteWidths sched, Predicate prd, 4670 bit IsCommutable = 0> { 4671 let Predicates = [prd] in 4672 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4673 IsCommutable>, EVEX_V512; 4674 4675 let Predicates = [prd, HasVLX] in { 4676 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4677 sched.YMM, IsCommutable>, EVEX_V256; 4678 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4679 sched.XMM, IsCommutable>, EVEX_V128; 4680 } 4681} 4682 4683multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4684 AVX512VLVectorVTInfo VTInfo, 4685 X86SchedWriteWidths sched, Predicate prd, 4686 bit IsCommutable = 0> { 4687 let Predicates = [prd] in 4688 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4689 IsCommutable>, EVEX_V512; 4690 4691 let Predicates = [prd, HasVLX] in { 4692 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4693 sched.YMM, IsCommutable>, EVEX_V256; 4694 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4695 sched.XMM, IsCommutable>, EVEX_V128; 4696 } 4697} 4698 4699multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4700 X86SchedWriteWidths sched, Predicate prd, 4701 bit IsCommutable = 0> { 4702 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4703 sched, prd, IsCommutable>, 4704 VEX_W, EVEX_CD8<64, CD8VF>; 4705} 4706 4707multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4708 X86SchedWriteWidths sched, Predicate prd, 4709 bit IsCommutable = 0> { 4710 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4711 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4712} 4713 4714multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4715 X86SchedWriteWidths sched, Predicate prd, 4716 bit IsCommutable = 0> { 4717 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4718 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4719 VEX_WIG; 4720} 4721 4722multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4723 X86SchedWriteWidths sched, Predicate prd, 4724 bit IsCommutable = 0> { 4725 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4726 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4727 VEX_WIG; 4728} 4729 4730multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4731 SDNode OpNode, X86SchedWriteWidths sched, 4732 Predicate prd, bit IsCommutable = 0> { 4733 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4734 IsCommutable>; 4735 4736 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4737 IsCommutable>; 4738} 4739 4740multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4741 SDNode OpNode, X86SchedWriteWidths sched, 4742 Predicate prd, bit IsCommutable = 0> { 4743 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4744 IsCommutable>; 4745 4746 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4747 IsCommutable>; 4748} 4749 4750multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4751 bits<8> opc_d, bits<8> opc_q, 4752 string OpcodeStr, SDNode OpNode, 4753 X86SchedWriteWidths sched, 4754 bit IsCommutable = 0> { 4755 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4756 sched, HasAVX512, IsCommutable>, 4757 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4758 sched, HasBWI, IsCommutable>; 4759} 4760 4761multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4762 X86FoldableSchedWrite sched, 4763 SDNode OpNode,X86VectorVTInfo _Src, 4764 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4765 bit IsCommutable = 0> { 4766 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4767 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4768 "$src2, $src1","$src1, $src2", 4769 (_Dst.VT (OpNode 4770 (_Src.VT _Src.RC:$src1), 4771 (_Src.VT _Src.RC:$src2))), 4772 IsCommutable>, 4773 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4774 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4775 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4776 "$src2, $src1", "$src1, $src2", 4777 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4778 (bitconvert (_Src.LdFrag addr:$src2))))>, 4779 AVX512BIBase, EVEX_4V, 4780 Sched<[sched.Folded, ReadAfterLd]>; 4781 4782 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4783 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4784 OpcodeStr, 4785 "${src2}"##_Brdct.BroadcastStr##", $src1", 4786 "$src1, ${src2}"##_Brdct.BroadcastStr, 4787 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4788 (_Brdct.VT (X86VBroadcast 4789 (_Brdct.ScalarLdFrag addr:$src2))))))>, 4790 AVX512BIBase, EVEX_4V, EVEX_B, 4791 Sched<[sched.Folded, ReadAfterLd]>; 4792} 4793 4794defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4795 SchedWriteVecALU, 1>; 4796defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4797 SchedWriteVecALU, 0>; 4798defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, 4799 SchedWriteVecALU, HasBWI, 1>; 4800defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, 4801 SchedWriteVecALU, HasBWI, 0>; 4802defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, 4803 SchedWriteVecALU, HasBWI, 1>; 4804defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, 4805 SchedWriteVecALU, HasBWI, 0>; 4806defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4807 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4808defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4809 SchedWriteVecIMul, HasBWI, 1>; 4810defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4811 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4812 NotEVEX2VEXConvertible; 4813defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4814 HasBWI, 1>; 4815defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4816 HasBWI, 1>; 4817defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4818 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4819defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4820 SchedWriteVecALU, HasBWI, 1>; 4821defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4822 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4823defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4824 SchedWriteVecIMul, HasAVX512, 1>; 4825 4826multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4827 X86SchedWriteWidths sched, 4828 AVX512VLVectorVTInfo _SrcVTInfo, 4829 AVX512VLVectorVTInfo _DstVTInfo, 4830 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4831 let Predicates = [prd] in 4832 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4833 _SrcVTInfo.info512, _DstVTInfo.info512, 4834 v8i64_info, IsCommutable>, 4835 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4836 let Predicates = [HasVLX, prd] in { 4837 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4838 _SrcVTInfo.info256, _DstVTInfo.info256, 4839 v4i64x_info, IsCommutable>, 4840 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4841 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4842 _SrcVTInfo.info128, _DstVTInfo.info128, 4843 v2i64x_info, IsCommutable>, 4844 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4845 } 4846} 4847 4848defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4849 avx512vl_i8_info, avx512vl_i8_info, 4850 X86multishift, HasVBMI, 0>, T8PD; 4851 4852multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4853 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4854 X86FoldableSchedWrite sched> { 4855 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4856 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4857 OpcodeStr, 4858 "${src2}"##_Src.BroadcastStr##", $src1", 4859 "$src1, ${src2}"##_Src.BroadcastStr, 4860 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4861 (_Src.VT (X86VBroadcast 4862 (_Src.ScalarLdFrag addr:$src2))))))>, 4863 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4864 Sched<[sched.Folded, ReadAfterLd]>; 4865} 4866 4867multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4868 SDNode OpNode,X86VectorVTInfo _Src, 4869 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4870 bit IsCommutable = 0> { 4871 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4872 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4873 "$src2, $src1","$src1, $src2", 4874 (_Dst.VT (OpNode 4875 (_Src.VT _Src.RC:$src1), 4876 (_Src.VT _Src.RC:$src2))), 4877 IsCommutable>, 4878 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4879 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4880 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4881 "$src2, $src1", "$src1, $src2", 4882 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4883 (bitconvert (_Src.LdFrag addr:$src2))))>, 4884 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4885 Sched<[sched.Folded, ReadAfterLd]>; 4886} 4887 4888multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4889 SDNode OpNode> { 4890 let Predicates = [HasBWI] in 4891 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4892 v32i16_info, SchedWriteShuffle.ZMM>, 4893 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4894 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4895 let Predicates = [HasBWI, HasVLX] in { 4896 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4897 v16i16x_info, SchedWriteShuffle.YMM>, 4898 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4899 v16i16x_info, SchedWriteShuffle.YMM>, 4900 EVEX_V256; 4901 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4902 v8i16x_info, SchedWriteShuffle.XMM>, 4903 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4904 v8i16x_info, SchedWriteShuffle.XMM>, 4905 EVEX_V128; 4906 } 4907} 4908multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4909 SDNode OpNode> { 4910 let Predicates = [HasBWI] in 4911 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4912 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4913 let Predicates = [HasBWI, HasVLX] in { 4914 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4915 v32i8x_info, SchedWriteShuffle.YMM>, 4916 EVEX_V256, VEX_WIG; 4917 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4918 v16i8x_info, SchedWriteShuffle.XMM>, 4919 EVEX_V128, VEX_WIG; 4920 } 4921} 4922 4923multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4924 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4925 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4926 let Predicates = [HasBWI] in 4927 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4928 _Dst.info512, SchedWriteVecIMul.ZMM, 4929 IsCommutable>, EVEX_V512; 4930 let Predicates = [HasBWI, HasVLX] in { 4931 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4932 _Dst.info256, SchedWriteVecIMul.YMM, 4933 IsCommutable>, EVEX_V256; 4934 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4935 _Dst.info128, SchedWriteVecIMul.XMM, 4936 IsCommutable>, EVEX_V128; 4937 } 4938} 4939 4940defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4941defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4942defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4943defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4944 4945defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4946 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4947defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4948 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4949 4950defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4951 SchedWriteVecALU, HasBWI, 1>, T8PD; 4952defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4953 SchedWriteVecALU, HasBWI, 1>; 4954defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4955 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4956defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4957 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4958 NotEVEX2VEXConvertible; 4959 4960defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4961 SchedWriteVecALU, HasBWI, 1>; 4962defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4963 SchedWriteVecALU, HasBWI, 1>, T8PD; 4964defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4965 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4966defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4967 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4968 NotEVEX2VEXConvertible; 4969 4970defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4971 SchedWriteVecALU, HasBWI, 1>, T8PD; 4972defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4973 SchedWriteVecALU, HasBWI, 1>; 4974defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4975 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4976defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4977 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4978 NotEVEX2VEXConvertible; 4979 4980defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4981 SchedWriteVecALU, HasBWI, 1>; 4982defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4983 SchedWriteVecALU, HasBWI, 1>, T8PD; 4984defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4985 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4986defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4987 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4988 NotEVEX2VEXConvertible; 4989 4990// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4991let Predicates = [HasDQI, NoVLX] in { 4992 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4993 (EXTRACT_SUBREG 4994 (VPMULLQZrr 4995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4996 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4997 sub_ymm)>; 4998 4999 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5000 (EXTRACT_SUBREG 5001 (VPMULLQZrr 5002 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5003 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5004 sub_xmm)>; 5005} 5006 5007// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5008let Predicates = [HasDQI, NoVLX] in { 5009 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5010 (EXTRACT_SUBREG 5011 (VPMULLQZrr 5012 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5013 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5014 sub_ymm)>; 5015 5016 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5017 (EXTRACT_SUBREG 5018 (VPMULLQZrr 5019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5020 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5021 sub_xmm)>; 5022} 5023 5024multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> { 5025 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5026 (EXTRACT_SUBREG 5027 (Instr 5028 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5029 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5030 sub_ymm)>; 5031 5032 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5033 (EXTRACT_SUBREG 5034 (Instr 5035 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5036 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5037 sub_xmm)>; 5038} 5039 5040let Predicates = [HasAVX512, NoVLX] in { 5041 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>; 5042 defm : avx512_min_max_lowering<VPMINUQZrr, umin>; 5043 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>; 5044 defm : avx512_min_max_lowering<VPMINSQZrr, smin>; 5045} 5046 5047//===----------------------------------------------------------------------===// 5048// AVX-512 Logical Instructions 5049//===----------------------------------------------------------------------===// 5050 5051// OpNodeMsk is the OpNode to use when element size is important. OpNode will 5052// be set to null_frag for 32-bit elements. 5053multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, 5054 SDPatternOperator OpNode, 5055 SDNode OpNodeMsk, X86FoldableSchedWrite sched, 5056 X86VectorVTInfo _, bit IsCommutable = 0> { 5057 let hasSideEffects = 0 in 5058 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst), 5059 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5060 "$src2, $src1", "$src1, $src2", 5061 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)), 5062 (bitconvert (_.VT _.RC:$src2)))), 5063 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, 5064 _.RC:$src2)))), 5065 IsCommutable>, AVX512BIBase, EVEX_4V, 5066 Sched<[sched]>; 5067 5068 let hasSideEffects = 0, mayLoad = 1 in 5069 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst), 5070 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5071 "$src2, $src1", "$src1, $src2", 5072 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)), 5073 (bitconvert (_.LdFrag addr:$src2)))), 5074 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, 5075 (bitconvert (_.LdFrag addr:$src2))))))>, 5076 AVX512BIBase, EVEX_4V, 5077 Sched<[sched.Folded, ReadAfterLd]>; 5078} 5079 5080// OpNodeMsk is the OpNode to use where element size is important. So use 5081// for all of the broadcast patterns. 5082multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, 5083 SDPatternOperator OpNode, 5084 SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _, 5085 bit IsCommutable = 0> : 5086 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _, 5087 IsCommutable> { 5088 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst), 5089 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5090 "${src2}"##_.BroadcastStr##", $src1", 5091 "$src1, ${src2}"##_.BroadcastStr, 5092 (_.i64VT (OpNodeMsk _.RC:$src1, 5093 (bitconvert 5094 (_.VT (X86VBroadcast 5095 (_.ScalarLdFrag addr:$src2)))))), 5096 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, 5097 (bitconvert 5098 (_.VT (X86VBroadcast 5099 (_.ScalarLdFrag addr:$src2))))))))>, 5100 AVX512BIBase, EVEX_4V, EVEX_B, 5101 Sched<[sched.Folded, ReadAfterLd]>; 5102} 5103 5104multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, 5105 SDPatternOperator OpNode, 5106 SDNode OpNodeMsk, X86SchedWriteWidths sched, 5107 AVX512VLVectorVTInfo VTInfo, 5108 bit IsCommutable = 0> { 5109 let Predicates = [HasAVX512] in 5110 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM, 5111 VTInfo.info512, IsCommutable>, EVEX_V512; 5112 5113 let Predicates = [HasAVX512, HasVLX] in { 5114 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM, 5115 VTInfo.info256, IsCommutable>, EVEX_V256; 5116 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM, 5117 VTInfo.info128, IsCommutable>, EVEX_V128; 5118 } 5119} 5120 5121multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 5122 SDNode OpNode, X86SchedWriteWidths sched, 5123 bit IsCommutable = 0> { 5124 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched, 5125 avx512vl_i64_info, IsCommutable>, 5126 VEX_W, EVEX_CD8<64, CD8VF>; 5127 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched, 5128 avx512vl_i32_info, IsCommutable>, 5129 EVEX_CD8<32, CD8VF>; 5130} 5131 5132defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5133 SchedWriteVecLogic, 1>; 5134defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5135 SchedWriteVecLogic, 1>; 5136defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5137 SchedWriteVecLogic, 1>; 5138defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5139 SchedWriteVecLogic>; 5140 5141//===----------------------------------------------------------------------===// 5142// AVX-512 FP arithmetic 5143//===----------------------------------------------------------------------===// 5144 5145multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5146 SDNode OpNode, SDNode VecNode, 5147 X86FoldableSchedWrite sched, bit IsCommutable> { 5148 let ExeDomain = _.ExeDomain in { 5149 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5150 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5151 "$src2, $src1", "$src1, $src2", 5152 (_.VT (VecNode _.RC:$src1, _.RC:$src2, 5153 (i32 FROUND_CURRENT)))>, 5154 Sched<[sched]>; 5155 5156 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5157 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5158 "$src2, $src1", "$src1, $src2", 5159 (_.VT (VecNode _.RC:$src1, 5160 _.ScalarIntMemCPat:$src2, 5161 (i32 FROUND_CURRENT)))>, 5162 Sched<[sched.Folded, ReadAfterLd]>; 5163 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5164 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5165 (ins _.FRC:$src1, _.FRC:$src2), 5166 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5167 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5168 Sched<[sched]> { 5169 let isCommutable = IsCommutable; 5170 } 5171 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5172 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5173 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5174 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5175 (_.ScalarLdFrag addr:$src2)))]>, 5176 Sched<[sched.Folded, ReadAfterLd]>; 5177 } 5178 } 5179} 5180 5181multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5182 SDNode VecNode, X86FoldableSchedWrite sched, 5183 bit IsCommutable = 0> { 5184 let ExeDomain = _.ExeDomain in 5185 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5186 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5187 "$rc, $src2, $src1", "$src1, $src2, $rc", 5188 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5189 (i32 imm:$rc)), IsCommutable>, 5190 EVEX_B, EVEX_RC, Sched<[sched]>; 5191} 5192multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5193 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5194 X86FoldableSchedWrite sched, bit IsCommutable> { 5195 let ExeDomain = _.ExeDomain in { 5196 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5197 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5198 "$src2, $src1", "$src1, $src2", 5199 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5200 Sched<[sched]>; 5201 5202 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5203 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5204 "$src2, $src1", "$src1, $src2", 5205 (_.VT (VecNode _.RC:$src1, 5206 _.ScalarIntMemCPat:$src2))>, 5207 Sched<[sched.Folded, ReadAfterLd]>; 5208 5209 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5210 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5211 (ins _.FRC:$src1, _.FRC:$src2), 5212 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5213 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5214 Sched<[sched]> { 5215 let isCommutable = IsCommutable; 5216 } 5217 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5218 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5219 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5220 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5221 (_.ScalarLdFrag addr:$src2)))]>, 5222 Sched<[sched.Folded, ReadAfterLd]>; 5223 } 5224 5225 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5226 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5227 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5228 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5229 (i32 FROUND_NO_EXC))>, EVEX_B, 5230 Sched<[sched]>; 5231 } 5232} 5233 5234multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 5235 SDNode VecNode, X86SchedWriteSizes sched, 5236 bit IsCommutable> { 5237 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5238 sched.PS.Scl, IsCommutable>, 5239 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode, 5240 sched.PS.Scl, IsCommutable>, 5241 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5242 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5243 sched.PD.Scl, IsCommutable>, 5244 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode, 5245 sched.PD.Scl, IsCommutable>, 5246 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5247} 5248 5249multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5250 SDNode VecNode, SDNode SaeNode, 5251 X86SchedWriteSizes sched, bit IsCommutable> { 5252 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5253 VecNode, SaeNode, sched.PS.Scl, IsCommutable>, 5254 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5255 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5256 VecNode, SaeNode, sched.PD.Scl, IsCommutable>, 5257 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5258} 5259defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, 5260 SchedWriteFAddSizes, 1>; 5261defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, 5262 SchedWriteFMulSizes, 1>; 5263defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, 5264 SchedWriteFAddSizes, 0>; 5265defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, 5266 SchedWriteFDivSizes, 0>; 5267defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, 5268 SchedWriteFCmpSizes, 0>; 5269defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, 5270 SchedWriteFCmpSizes, 0>; 5271 5272// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5273// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5274multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5275 X86VectorVTInfo _, SDNode OpNode, 5276 X86FoldableSchedWrite sched> { 5277 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5278 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5279 (ins _.FRC:$src1, _.FRC:$src2), 5280 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5281 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5282 Sched<[sched]> { 5283 let isCommutable = 1; 5284 } 5285 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5286 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5287 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5288 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5289 (_.ScalarLdFrag addr:$src2)))]>, 5290 Sched<[sched.Folded, ReadAfterLd]>; 5291 } 5292} 5293defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5294 SchedWriteFCmp.Scl>, XS, EVEX_4V, 5295 VEX_LIG, EVEX_CD8<32, CD8VT1>; 5296 5297defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5298 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, 5299 VEX_LIG, EVEX_CD8<64, CD8VT1>; 5300 5301defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5302 SchedWriteFCmp.Scl>, XS, EVEX_4V, 5303 VEX_LIG, EVEX_CD8<32, CD8VT1>; 5304 5305defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5306 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, 5307 VEX_LIG, EVEX_CD8<64, CD8VT1>; 5308 5309multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5310 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5311 bit IsCommutable, 5312 bit IsKZCommutable = IsCommutable> { 5313 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5314 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5315 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5316 "$src2, $src1", "$src1, $src2", 5317 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0, 5318 IsKZCommutable>, 5319 EVEX_4V, Sched<[sched]>; 5320 let mayLoad = 1 in { 5321 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5322 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5323 "$src2, $src1", "$src1, $src2", 5324 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5325 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 5326 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5327 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5328 "${src2}"##_.BroadcastStr##", $src1", 5329 "$src1, ${src2}"##_.BroadcastStr, 5330 (OpNode _.RC:$src1, (_.VT (X86VBroadcast 5331 (_.ScalarLdFrag addr:$src2))))>, 5332 EVEX_4V, EVEX_B, 5333 Sched<[sched.Folded, ReadAfterLd]>; 5334 } 5335 } 5336} 5337 5338multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5339 SDPatternOperator OpNodeRnd, 5340 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5341 let ExeDomain = _.ExeDomain in 5342 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5343 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, 5344 "$rc, $src2, $src1", "$src1, $src2, $rc", 5345 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>, 5346 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5347} 5348 5349multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5350 SDPatternOperator OpNodeRnd, 5351 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5352 let ExeDomain = _.ExeDomain in 5353 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5354 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5355 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5356 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>, 5357 EVEX_4V, EVEX_B, Sched<[sched]>; 5358} 5359 5360multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5361 Predicate prd, X86SchedWriteSizes sched, 5362 bit IsCommutable = 0, 5363 bit IsPD128Commutable = IsCommutable> { 5364 let Predicates = [prd] in { 5365 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, 5366 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5367 EVEX_CD8<32, CD8VF>; 5368 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, 5369 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5370 EVEX_CD8<64, CD8VF>; 5371 } 5372 5373 // Define only if AVX512VL feature is present. 5374 let Predicates = [prd, HasVLX] in { 5375 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, 5376 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5377 EVEX_CD8<32, CD8VF>; 5378 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, 5379 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5380 EVEX_CD8<32, CD8VF>; 5381 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, 5382 sched.PD.XMM, IsPD128Commutable, 5383 IsCommutable>, EVEX_V128, PD, VEX_W, 5384 EVEX_CD8<64, CD8VF>; 5385 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, 5386 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5387 EVEX_CD8<64, CD8VF>; 5388 } 5389} 5390 5391multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5392 X86SchedWriteSizes sched> { 5393 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5394 v16f32_info>, 5395 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5396 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5397 v8f64_info>, 5398 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5399} 5400 5401multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5402 X86SchedWriteSizes sched> { 5403 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5404 v16f32_info>, 5405 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5406 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5407 v8f64_info>, 5408 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5409} 5410 5411defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, 5412 SchedWriteFAddSizes, 1>, 5413 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5414defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, 5415 SchedWriteFMulSizes, 1>, 5416 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5417defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, 5418 SchedWriteFAddSizes>, 5419 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5420defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, 5421 SchedWriteFDivSizes>, 5422 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5423defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, 5424 SchedWriteFCmpSizes, 0>, 5425 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>; 5426defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, 5427 SchedWriteFCmpSizes, 0>, 5428 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>; 5429let isCodeGenOnly = 1 in { 5430 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, 5431 SchedWriteFCmpSizes, 1>; 5432 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, 5433 SchedWriteFCmpSizes, 1>; 5434} 5435defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, 5436 SchedWriteFLogicSizes, 1>; 5437defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, 5438 SchedWriteFLogicSizes, 0>; 5439defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, 5440 SchedWriteFLogicSizes, 1>; 5441defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, 5442 SchedWriteFLogicSizes, 1>; 5443 5444// Patterns catch floating point selects with bitcasted integer logic ops. 5445multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode, 5446 X86VectorVTInfo _, Predicate prd> { 5447let Predicates = [prd] in { 5448 // Masked register-register logical operations. 5449 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5450 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), 5451 _.RC:$src0)), 5452 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5453 _.RC:$src1, _.RC:$src2)>; 5454 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5455 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), 5456 _.ImmAllZerosV)), 5457 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5458 _.RC:$src2)>; 5459 // Masked register-memory logical operations. 5460 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5461 (bitconvert (_.i64VT (OpNode _.RC:$src1, 5462 (load addr:$src2)))), 5463 _.RC:$src0)), 5464 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5465 _.RC:$src1, addr:$src2)>; 5466 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5467 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))), 5468 _.ImmAllZerosV)), 5469 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5470 addr:$src2)>; 5471 // Register-broadcast logical operations. 5472 def : Pat<(_.i64VT (OpNode _.RC:$src1, 5473 (bitconvert (_.VT (X86VBroadcast 5474 (_.ScalarLdFrag addr:$src2)))))), 5475 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>; 5476 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5477 (bitconvert 5478 (_.i64VT (OpNode _.RC:$src1, 5479 (bitconvert (_.VT 5480 (X86VBroadcast 5481 (_.ScalarLdFrag addr:$src2))))))), 5482 _.RC:$src0)), 5483 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5484 _.RC:$src1, addr:$src2)>; 5485 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5486 (bitconvert 5487 (_.i64VT (OpNode _.RC:$src1, 5488 (bitconvert (_.VT 5489 (X86VBroadcast 5490 (_.ScalarLdFrag addr:$src2))))))), 5491 _.ImmAllZerosV)), 5492 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5493 _.RC:$src1, addr:$src2)>; 5494} 5495} 5496 5497multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> { 5498 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>; 5499 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>; 5500 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>; 5501 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>; 5502 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>; 5503 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>; 5504} 5505 5506defm : avx512_fp_logical_lowering_sizes<"VPAND", and>; 5507defm : avx512_fp_logical_lowering_sizes<"VPOR", or>; 5508defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>; 5509defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>; 5510 5511let Predicates = [HasVLX,HasDQI] in { 5512 // Use packed logical operations for scalar ops. 5513 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)), 5514 (COPY_TO_REGCLASS 5515 (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5516 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5517 FR64X)>; 5518 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)), 5519 (COPY_TO_REGCLASS 5520 (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5521 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5522 FR64X)>; 5523 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)), 5524 (COPY_TO_REGCLASS 5525 (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5526 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5527 FR64X)>; 5528 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)), 5529 (COPY_TO_REGCLASS 5530 (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5531 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5532 FR64X)>; 5533 5534 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)), 5535 (COPY_TO_REGCLASS 5536 (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5537 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5538 FR32X)>; 5539 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)), 5540 (COPY_TO_REGCLASS 5541 (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5542 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5543 FR32X)>; 5544 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)), 5545 (COPY_TO_REGCLASS 5546 (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5547 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5548 FR32X)>; 5549 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)), 5550 (COPY_TO_REGCLASS 5551 (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5552 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5553 FR32X)>; 5554} 5555 5556multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5557 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5558 let ExeDomain = _.ExeDomain in { 5559 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5560 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5561 "$src2, $src1", "$src1, $src2", 5562 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, 5563 EVEX_4V, Sched<[sched]>; 5564 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5565 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5566 "$src2, $src1", "$src1, $src2", 5567 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, 5568 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 5569 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5570 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5571 "${src2}"##_.BroadcastStr##", $src1", 5572 "$src1, ${src2}"##_.BroadcastStr, 5573 (OpNode _.RC:$src1, (_.VT (X86VBroadcast 5574 (_.ScalarLdFrag addr:$src2))), 5575 (i32 FROUND_CURRENT))>, 5576 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 5577 } 5578} 5579 5580multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5581 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5582 let ExeDomain = _.ExeDomain in { 5583 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5584 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5585 "$src2, $src1", "$src1, $src2", 5586 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, 5587 Sched<[sched]>; 5588 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5589 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix, 5590 "$src2, $src1", "$src1, $src2", 5591 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2, 5592 (i32 FROUND_CURRENT))>, 5593 Sched<[sched.Folded, ReadAfterLd]>; 5594 } 5595} 5596 5597multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5598 SDNode OpNode, SDNode OpNodeScal, 5599 X86SchedWriteWidths sched> { 5600 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>, 5601 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>, 5602 EVEX_V512, EVEX_CD8<32, CD8VF>; 5603 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>, 5604 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>, 5605 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5606 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>, 5607 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>, 5608 EVEX_4V,EVEX_CD8<32, CD8VT1>; 5609 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>, 5610 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>, 5611 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 5612 5613 // Define only if AVX512VL feature is present. 5614 let Predicates = [HasVLX] in { 5615 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>, 5616 EVEX_V128, EVEX_CD8<32, CD8VF>; 5617 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>, 5618 EVEX_V256, EVEX_CD8<32, CD8VF>; 5619 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>, 5620 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5621 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>, 5622 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5623 } 5624} 5625defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs, 5626 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5627 5628//===----------------------------------------------------------------------===// 5629// AVX-512 VPTESTM instructions 5630//===----------------------------------------------------------------------===// 5631 5632multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5633 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5634 string Name> { 5635 let ExeDomain = _.ExeDomain in { 5636 let isCommutable = 1 in 5637 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5638 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5639 "$src2, $src1", "$src1, $src2", 5640 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), 5641 _.ImmAllZerosV)>, 5642 EVEX_4V, Sched<[sched]>; 5643 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5644 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5645 "$src2, $src1", "$src1, $src2", 5646 (OpNode (bitconvert 5647 (_.i64VT (and _.RC:$src1, 5648 (bitconvert (_.LdFrag addr:$src2))))), 5649 _.ImmAllZerosV)>, 5650 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5651 Sched<[sched.Folded, ReadAfterLd]>; 5652 } 5653 5654 // Patterns for compare with 0 that just use the same source twice. 5655 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), 5656 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr") 5657 _.RC:$src, _.RC:$src))>; 5658 5659 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), 5660 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk") 5661 _.KRC:$mask, _.RC:$src, _.RC:$src))>; 5662} 5663 5664multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5665 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5666 let ExeDomain = _.ExeDomain in 5667 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5668 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5669 "${src2}"##_.BroadcastStr##", $src1", 5670 "$src1, ${src2}"##_.BroadcastStr, 5671 (OpNode (and _.RC:$src1, 5672 (X86VBroadcast 5673 (_.ScalarLdFrag addr:$src2))), 5674 _.ImmAllZerosV)>, 5675 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5676 Sched<[sched.Folded, ReadAfterLd]>; 5677} 5678 5679// Use 512bit version to implement 128/256 bit in case NoVLX. 5680multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo, 5681 X86VectorVTInfo _, string Name> { 5682 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), 5683 _.ImmAllZerosV)), 5684 (_.KVT (COPY_TO_REGCLASS 5685 (!cast<Instruction>(Name # "Zrr") 5686 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5687 _.RC:$src1, _.SubRegIdx), 5688 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5689 _.RC:$src2, _.SubRegIdx)), 5690 _.KRC))>; 5691 5692 def : Pat<(_.KVT (and _.KRC:$mask, 5693 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), 5694 _.ImmAllZerosV))), 5695 (COPY_TO_REGCLASS 5696 (!cast<Instruction>(Name # "Zrrk") 5697 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), 5698 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5699 _.RC:$src1, _.SubRegIdx), 5700 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5701 _.RC:$src2, _.SubRegIdx)), 5702 _.KRC)>; 5703 5704 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), 5705 (_.KVT (COPY_TO_REGCLASS 5706 (!cast<Instruction>(Name # "Zrr") 5707 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5708 _.RC:$src, _.SubRegIdx), 5709 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5710 _.RC:$src, _.SubRegIdx)), 5711 _.KRC))>; 5712 5713 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), 5714 (COPY_TO_REGCLASS 5715 (!cast<Instruction>(Name # "Zrrk") 5716 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), 5717 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5718 _.RC:$src, _.SubRegIdx), 5719 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5720 _.RC:$src, _.SubRegIdx)), 5721 _.KRC)>; 5722} 5723 5724multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5725 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5726 let Predicates = [HasAVX512] in 5727 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>, 5728 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 5729 5730 let Predicates = [HasAVX512, HasVLX] in { 5731 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>, 5732 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 5733 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>, 5734 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 5735 } 5736 let Predicates = [HasAVX512, NoVLX] in { 5737 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>; 5738 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>; 5739 } 5740} 5741 5742multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5743 X86SchedWriteWidths sched> { 5744 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched, 5745 avx512vl_i32_info>; 5746 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched, 5747 avx512vl_i64_info>, VEX_W; 5748} 5749 5750multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5751 PatFrag OpNode, X86SchedWriteWidths sched> { 5752 let Predicates = [HasBWI] in { 5753 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM, 5754 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5755 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM, 5756 v64i8_info, NAME#"B">, EVEX_V512; 5757 } 5758 let Predicates = [HasVLX, HasBWI] in { 5759 5760 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM, 5761 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5762 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM, 5763 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5764 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM, 5765 v32i8x_info, NAME#"B">, EVEX_V256; 5766 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM, 5767 v16i8x_info, NAME#"B">, EVEX_V128; 5768 } 5769 5770 let Predicates = [HasAVX512, NoVLX] in { 5771 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">; 5772 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">; 5773 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">; 5774 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">; 5775 } 5776} 5777 5778// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm 5779// as commutable here because we already canonicalized all zeros vectors to the 5780// RHS during lowering. 5781def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2), 5782 (setcc node:$src1, node:$src2, SETEQ)>; 5783def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), 5784 (setcc node:$src1, node:$src2, SETNE)>; 5785 5786multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5787 PatFrag OpNode, X86SchedWriteWidths sched> : 5788 avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>, 5789 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>; 5790 5791defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem, 5792 SchedWriteVecLogic>, T8PD; 5793defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm, 5794 SchedWriteVecLogic>, T8XS; 5795 5796//===----------------------------------------------------------------------===// 5797// AVX-512 Shift instructions 5798//===----------------------------------------------------------------------===// 5799 5800multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5801 string OpcodeStr, SDNode OpNode, 5802 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5803 let ExeDomain = _.ExeDomain in { 5804 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5805 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5806 "$src2, $src1", "$src1, $src2", 5807 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>, 5808 Sched<[sched]>; 5809 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5810 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5811 "$src2, $src1", "$src1, $src2", 5812 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 5813 (i8 imm:$src2)))>, 5814 Sched<[sched.Folded]>; 5815 } 5816} 5817 5818multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5819 string OpcodeStr, SDNode OpNode, 5820 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5821 let ExeDomain = _.ExeDomain in 5822 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5823 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5824 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", 5825 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>, 5826 EVEX_B, Sched<[sched.Folded]>; 5827} 5828 5829multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5830 X86FoldableSchedWrite sched, ValueType SrcVT, 5831 PatFrag bc_frag, X86VectorVTInfo _> { 5832 // src2 is always 128-bit 5833 let ExeDomain = _.ExeDomain in { 5834 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5835 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5836 "$src2, $src1", "$src1, $src2", 5837 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5838 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5839 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5840 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5841 "$src2, $src1", "$src1, $src2", 5842 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>, 5843 AVX512BIBase, 5844 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 5845 } 5846} 5847 5848multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5849 X86SchedWriteWidths sched, ValueType SrcVT, 5850 PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo, 5851 Predicate prd> { 5852 let Predicates = [prd] in 5853 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5854 bc_frag, VTInfo.info512>, EVEX_V512, 5855 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5856 let Predicates = [prd, HasVLX] in { 5857 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5858 bc_frag, VTInfo.info256>, EVEX_V256, 5859 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5860 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5861 bc_frag, VTInfo.info128>, EVEX_V128, 5862 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5863 } 5864} 5865 5866multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5867 string OpcodeStr, SDNode OpNode, 5868 X86SchedWriteWidths sched, 5869 bit NotEVEX2VEXConvertibleQ = 0> { 5870 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5871 bc_v4i32, avx512vl_i32_info, HasAVX512>; 5872 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5873 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5874 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W; 5875 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5876 bc_v2i64, avx512vl_i16_info, HasBWI>; 5877} 5878 5879multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5880 string OpcodeStr, SDNode OpNode, 5881 X86SchedWriteWidths sched, 5882 AVX512VLVectorVTInfo VTInfo> { 5883 let Predicates = [HasAVX512] in 5884 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5885 sched.ZMM, VTInfo.info512>, 5886 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5887 VTInfo.info512>, EVEX_V512; 5888 let Predicates = [HasAVX512, HasVLX] in { 5889 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5890 sched.YMM, VTInfo.info256>, 5891 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5892 VTInfo.info256>, EVEX_V256; 5893 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5894 sched.XMM, VTInfo.info128>, 5895 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5896 VTInfo.info128>, EVEX_V128; 5897 } 5898} 5899 5900multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5901 string OpcodeStr, SDNode OpNode, 5902 X86SchedWriteWidths sched> { 5903 let Predicates = [HasBWI] in 5904 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5905 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5906 let Predicates = [HasVLX, HasBWI] in { 5907 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5908 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5909 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5910 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5911 } 5912} 5913 5914multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5915 Format ImmFormR, Format ImmFormM, 5916 string OpcodeStr, SDNode OpNode, 5917 X86SchedWriteWidths sched, 5918 bit NotEVEX2VEXConvertibleQ = 0> { 5919 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5920 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5921 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5922 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5923 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5924} 5925 5926defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5927 SchedWriteVecShiftImm>, 5928 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5929 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5930 5931defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5932 SchedWriteVecShiftImm>, 5933 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5934 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5935 5936defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5937 SchedWriteVecShiftImm, 1>, 5938 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5939 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5940 5941defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5942 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5943defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5944 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5945 5946defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5947 SchedWriteVecShift>; 5948defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5949 SchedWriteVecShift, 1>; 5950defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5951 SchedWriteVecShift>; 5952 5953// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5954let Predicates = [HasAVX512, NoVLX] in { 5955 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5956 (EXTRACT_SUBREG (v8i64 5957 (VPSRAQZrr 5958 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5959 VR128X:$src2)), sub_ymm)>; 5960 5961 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5962 (EXTRACT_SUBREG (v8i64 5963 (VPSRAQZrr 5964 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5965 VR128X:$src2)), sub_xmm)>; 5966 5967 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), 5968 (EXTRACT_SUBREG (v8i64 5969 (VPSRAQZri 5970 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5971 imm:$src2)), sub_ymm)>; 5972 5973 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), 5974 (EXTRACT_SUBREG (v8i64 5975 (VPSRAQZri 5976 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5977 imm:$src2)), sub_xmm)>; 5978} 5979 5980//===-------------------------------------------------------------------===// 5981// Variable Bit Shifts 5982//===-------------------------------------------------------------------===// 5983 5984multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5985 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5986 let ExeDomain = _.ExeDomain in { 5987 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5988 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5989 "$src2, $src1", "$src1, $src2", 5990 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5991 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5992 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5993 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5994 "$src2, $src1", "$src1, $src2", 5995 (_.VT (OpNode _.RC:$src1, 5996 (_.VT (bitconvert (_.LdFrag addr:$src2)))))>, 5997 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5998 Sched<[sched.Folded, ReadAfterLd]>; 5999 } 6000} 6001 6002multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6003 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6004 let ExeDomain = _.ExeDomain in 6005 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6006 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6007 "${src2}"##_.BroadcastStr##", $src1", 6008 "$src1, ${src2}"##_.BroadcastStr, 6009 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast 6010 (_.ScalarLdFrag addr:$src2)))))>, 6011 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6012 Sched<[sched.Folded, ReadAfterLd]>; 6013} 6014 6015multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6016 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6017 let Predicates = [HasAVX512] in 6018 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6019 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6020 6021 let Predicates = [HasAVX512, HasVLX] in { 6022 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6023 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6024 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6025 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6026 } 6027} 6028 6029multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6030 SDNode OpNode, X86SchedWriteWidths sched> { 6031 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6032 avx512vl_i32_info>; 6033 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6034 avx512vl_i64_info>, VEX_W; 6035} 6036 6037// Use 512bit version to implement 128/256 bit in case NoVLX. 6038multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6039 SDNode OpNode, list<Predicate> p> { 6040 let Predicates = p in { 6041 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6042 (_.info256.VT _.info256.RC:$src2))), 6043 (EXTRACT_SUBREG 6044 (!cast<Instruction>(OpcodeStr#"Zrr") 6045 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6046 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6047 sub_ymm)>; 6048 6049 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6050 (_.info128.VT _.info128.RC:$src2))), 6051 (EXTRACT_SUBREG 6052 (!cast<Instruction>(OpcodeStr#"Zrr") 6053 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6054 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6055 sub_xmm)>; 6056 } 6057} 6058multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6059 SDNode OpNode, X86SchedWriteWidths sched> { 6060 let Predicates = [HasBWI] in 6061 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6062 EVEX_V512, VEX_W; 6063 let Predicates = [HasVLX, HasBWI] in { 6064 6065 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6066 EVEX_V256, VEX_W; 6067 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6068 EVEX_V128, VEX_W; 6069 } 6070} 6071 6072defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>, 6073 avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>; 6074 6075defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>, 6076 avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>; 6077 6078defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>, 6079 avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>; 6080 6081defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6082defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6083 6084defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>; 6085defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>; 6086defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>; 6087defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>; 6088 6089// Special handing for handling VPSRAV intrinsics. 6090multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _, 6091 list<Predicate> p> { 6092 let Predicates = p in { 6093 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)), 6094 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1, 6095 _.RC:$src2)>; 6096 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))), 6097 (!cast<Instruction>(InstrStr#_.ZSuffix##rm) 6098 _.RC:$src1, addr:$src2)>; 6099 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6100 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)), 6101 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0, 6102 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; 6103 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6104 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))), 6105 _.RC:$src0)), 6106 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0, 6107 _.KRC:$mask, _.RC:$src1, addr:$src2)>; 6108 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6109 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), 6110 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, 6111 _.RC:$src1, _.RC:$src2)>; 6112 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6113 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))), 6114 _.ImmAllZerosV)), 6115 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, 6116 _.RC:$src1, addr:$src2)>; 6117 } 6118} 6119 6120multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _, 6121 list<Predicate> p> : 6122 avx512_var_shift_int_lowering<InstrStr, _, p> { 6123 let Predicates = p in { 6124 def : Pat<(_.VT (X86vsrav _.RC:$src1, 6125 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), 6126 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb) 6127 _.RC:$src1, addr:$src2)>; 6128 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6129 (X86vsrav _.RC:$src1, 6130 (X86VBroadcast (_.ScalarLdFrag addr:$src2))), 6131 _.RC:$src0)), 6132 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, 6133 _.KRC:$mask, _.RC:$src1, addr:$src2)>; 6134 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6135 (X86vsrav _.RC:$src1, 6136 (X86VBroadcast (_.ScalarLdFrag addr:$src2))), 6137 _.ImmAllZerosV)), 6138 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, 6139 _.RC:$src1, addr:$src2)>; 6140 } 6141} 6142 6143defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>; 6144defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>; 6145defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>; 6146defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>; 6147defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>; 6148defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>; 6149defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; 6150defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; 6151defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; 6152 6153// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6154let Predicates = [HasAVX512, NoVLX] in { 6155 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6156 (EXTRACT_SUBREG (v8i64 6157 (VPROLVQZrr 6158 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6159 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6160 sub_xmm)>; 6161 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6162 (EXTRACT_SUBREG (v8i64 6163 (VPROLVQZrr 6164 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6165 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6166 sub_ymm)>; 6167 6168 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6169 (EXTRACT_SUBREG (v16i32 6170 (VPROLVDZrr 6171 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6172 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6173 sub_xmm)>; 6174 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6175 (EXTRACT_SUBREG (v16i32 6176 (VPROLVDZrr 6177 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6178 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6179 sub_ymm)>; 6180 6181 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), 6182 (EXTRACT_SUBREG (v8i64 6183 (VPROLQZri 6184 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6185 imm:$src2)), sub_xmm)>; 6186 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), 6187 (EXTRACT_SUBREG (v8i64 6188 (VPROLQZri 6189 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6190 imm:$src2)), sub_ymm)>; 6191 6192 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), 6193 (EXTRACT_SUBREG (v16i32 6194 (VPROLDZri 6195 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6196 imm:$src2)), sub_xmm)>; 6197 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), 6198 (EXTRACT_SUBREG (v16i32 6199 (VPROLDZri 6200 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6201 imm:$src2)), sub_ymm)>; 6202} 6203 6204// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6205let Predicates = [HasAVX512, NoVLX] in { 6206 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6207 (EXTRACT_SUBREG (v8i64 6208 (VPRORVQZrr 6209 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6210 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6211 sub_xmm)>; 6212 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6213 (EXTRACT_SUBREG (v8i64 6214 (VPRORVQZrr 6215 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6216 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6217 sub_ymm)>; 6218 6219 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6220 (EXTRACT_SUBREG (v16i32 6221 (VPRORVDZrr 6222 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6223 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6224 sub_xmm)>; 6225 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6226 (EXTRACT_SUBREG (v16i32 6227 (VPRORVDZrr 6228 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6229 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6230 sub_ymm)>; 6231 6232 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), 6233 (EXTRACT_SUBREG (v8i64 6234 (VPRORQZri 6235 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6236 imm:$src2)), sub_xmm)>; 6237 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), 6238 (EXTRACT_SUBREG (v8i64 6239 (VPRORQZri 6240 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6241 imm:$src2)), sub_ymm)>; 6242 6243 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), 6244 (EXTRACT_SUBREG (v16i32 6245 (VPRORDZri 6246 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6247 imm:$src2)), sub_xmm)>; 6248 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), 6249 (EXTRACT_SUBREG (v16i32 6250 (VPRORDZri 6251 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6252 imm:$src2)), sub_ymm)>; 6253} 6254 6255//===-------------------------------------------------------------------===// 6256// 1-src variable permutation VPERMW/D/Q 6257//===-------------------------------------------------------------------===// 6258 6259multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6260 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6261 let Predicates = [HasAVX512] in 6262 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6263 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6264 6265 let Predicates = [HasAVX512, HasVLX] in 6266 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6267 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6268} 6269 6270multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6271 string OpcodeStr, SDNode OpNode, 6272 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6273 let Predicates = [HasAVX512] in 6274 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6275 sched, VTInfo.info512>, 6276 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6277 sched, VTInfo.info512>, EVEX_V512; 6278 let Predicates = [HasAVX512, HasVLX] in 6279 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6280 sched, VTInfo.info256>, 6281 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6282 sched, VTInfo.info256>, EVEX_V256; 6283} 6284 6285multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6286 Predicate prd, SDNode OpNode, 6287 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6288 let Predicates = [prd] in 6289 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6290 EVEX_V512 ; 6291 let Predicates = [HasVLX, prd] in { 6292 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6293 EVEX_V256 ; 6294 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6295 EVEX_V128 ; 6296 } 6297} 6298 6299defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6300 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6301defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6302 WriteVarShuffle256, avx512vl_i8_info>; 6303 6304defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6305 WriteVarShuffle256, avx512vl_i32_info>; 6306defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6307 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6308defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6309 WriteFVarShuffle256, avx512vl_f32_info>; 6310defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6311 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6312 6313defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6314 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6315 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6316defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6317 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6318 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6319 6320//===----------------------------------------------------------------------===// 6321// AVX-512 - VPERMIL 6322//===----------------------------------------------------------------------===// 6323 6324multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6325 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6326 X86VectorVTInfo Ctrl> { 6327 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6328 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6329 "$src2, $src1", "$src1, $src2", 6330 (_.VT (OpNode _.RC:$src1, 6331 (Ctrl.VT Ctrl.RC:$src2)))>, 6332 T8PD, EVEX_4V, Sched<[sched]>; 6333 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6334 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6335 "$src2, $src1", "$src1, $src2", 6336 (_.VT (OpNode 6337 _.RC:$src1, 6338 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>, 6339 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6340 Sched<[sched.Folded, ReadAfterLd]>; 6341 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6342 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6343 "${src2}"##_.BroadcastStr##", $src1", 6344 "$src1, ${src2}"##_.BroadcastStr, 6345 (_.VT (OpNode 6346 _.RC:$src1, 6347 (Ctrl.VT (X86VBroadcast 6348 (Ctrl.ScalarLdFrag addr:$src2)))))>, 6349 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6350 Sched<[sched.Folded, ReadAfterLd]>; 6351} 6352 6353multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6354 X86SchedWriteWidths sched, 6355 AVX512VLVectorVTInfo _, 6356 AVX512VLVectorVTInfo Ctrl> { 6357 let Predicates = [HasAVX512] in { 6358 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6359 _.info512, Ctrl.info512>, EVEX_V512; 6360 } 6361 let Predicates = [HasAVX512, HasVLX] in { 6362 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6363 _.info128, Ctrl.info128>, EVEX_V128; 6364 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6365 _.info256, Ctrl.info256>, EVEX_V256; 6366 } 6367} 6368 6369multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6370 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6371 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6372 _, Ctrl>; 6373 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6374 X86VPermilpi, SchedWriteFShuffle, _>, 6375 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6376} 6377 6378let ExeDomain = SSEPackedSingle in 6379defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6380 avx512vl_i32_info>; 6381let ExeDomain = SSEPackedDouble in 6382defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6383 avx512vl_i64_info>, VEX_W1X; 6384 6385//===----------------------------------------------------------------------===// 6386// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6387//===----------------------------------------------------------------------===// 6388 6389defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6390 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6391 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6392defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6393 X86PShufhw, SchedWriteShuffle>, 6394 EVEX, AVX512XSIi8Base; 6395defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6396 X86PShuflw, SchedWriteShuffle>, 6397 EVEX, AVX512XDIi8Base; 6398 6399//===----------------------------------------------------------------------===// 6400// AVX-512 - VPSHUFB 6401//===----------------------------------------------------------------------===// 6402 6403multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6404 X86SchedWriteWidths sched> { 6405 let Predicates = [HasBWI] in 6406 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6407 EVEX_V512; 6408 6409 let Predicates = [HasVLX, HasBWI] in { 6410 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6411 EVEX_V256; 6412 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6413 EVEX_V128; 6414 } 6415} 6416 6417defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6418 SchedWriteVarShuffle>, VEX_WIG; 6419 6420//===----------------------------------------------------------------------===// 6421// Move Low to High and High to Low packed FP Instructions 6422//===----------------------------------------------------------------------===// 6423 6424def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6425 (ins VR128X:$src1, VR128X:$src2), 6426 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6427 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6428 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6429let isCommutable = 1 in 6430def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6431 (ins VR128X:$src1, VR128X:$src2), 6432 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6433 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6434 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6435 6436//===----------------------------------------------------------------------===// 6437// VMOVHPS/PD VMOVLPS Instructions 6438// All patterns was taken from SSS implementation. 6439//===----------------------------------------------------------------------===// 6440 6441multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6442 SDPatternOperator OpNode, 6443 X86VectorVTInfo _> { 6444 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6445 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6446 (ins _.RC:$src1, f64mem:$src2), 6447 !strconcat(OpcodeStr, 6448 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6449 [(set _.RC:$dst, 6450 (OpNode _.RC:$src1, 6451 (_.VT (bitconvert 6452 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6453 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V; 6454} 6455 6456// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6457// SSE1. And MOVLPS pattern is even more complex. 6458defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6459 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6460defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6461 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6462defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6463 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6464defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6465 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6466 6467let Predicates = [HasAVX512] in { 6468 // VMOVHPD patterns 6469 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, 6470 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 6471 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6472} 6473 6474let SchedRW = [WriteFStore] in { 6475def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6476 (ins f64mem:$dst, VR128X:$src), 6477 "vmovhps\t{$src, $dst|$dst, $src}", 6478 [(store (f64 (extractelt 6479 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)), 6480 (bc_v2f64 (v4f32 VR128X:$src))), 6481 (iPTR 0))), addr:$dst)]>, 6482 EVEX, EVEX_CD8<32, CD8VT2>; 6483def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6484 (ins f64mem:$dst, VR128X:$src), 6485 "vmovhpd\t{$src, $dst|$dst, $src}", 6486 [(store (f64 (extractelt 6487 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6488 (iPTR 0))), addr:$dst)]>, 6489 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6490def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6491 (ins f64mem:$dst, VR128X:$src), 6492 "vmovlps\t{$src, $dst|$dst, $src}", 6493 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)), 6494 (iPTR 0))), addr:$dst)]>, 6495 EVEX, EVEX_CD8<32, CD8VT2>; 6496def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6497 (ins f64mem:$dst, VR128X:$src), 6498 "vmovlpd\t{$src, $dst|$dst, $src}", 6499 [(store (f64 (extractelt (v2f64 VR128X:$src), 6500 (iPTR 0))), addr:$dst)]>, 6501 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6502} // SchedRW 6503 6504let Predicates = [HasAVX512] in { 6505 // VMOVHPD patterns 6506 def : Pat<(store (f64 (extractelt 6507 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6508 (iPTR 0))), addr:$dst), 6509 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6510} 6511//===----------------------------------------------------------------------===// 6512// FMA - Fused Multiply Operations 6513// 6514 6515multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6516 X86FoldableSchedWrite sched, 6517 X86VectorVTInfo _, string Suff> { 6518 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6519 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6520 (ins _.RC:$src2, _.RC:$src3), 6521 OpcodeStr, "$src3, $src2", "$src2, $src3", 6522 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6523 AVX512FMA3Base, Sched<[sched]>; 6524 6525 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6526 (ins _.RC:$src2, _.MemOp:$src3), 6527 OpcodeStr, "$src3, $src2", "$src2, $src3", 6528 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6529 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 6530 6531 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6532 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6533 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6534 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6535 (OpNode _.RC:$src2, 6536 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, 6537 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 6538 } 6539} 6540 6541multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6542 X86FoldableSchedWrite sched, 6543 X86VectorVTInfo _, string Suff> { 6544 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6545 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6546 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6547 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6548 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>, 6549 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6550} 6551 6552multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6553 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6554 AVX512VLVectorVTInfo _, string Suff> { 6555 let Predicates = [HasAVX512] in { 6556 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6557 _.info512, Suff>, 6558 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6559 _.info512, Suff>, 6560 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6561 } 6562 let Predicates = [HasVLX, HasAVX512] in { 6563 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM, 6564 _.info256, Suff>, 6565 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6566 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM, 6567 _.info128, Suff>, 6568 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6569 } 6570} 6571 6572multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6573 SDNode OpNodeRnd> { 6574 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6575 SchedWriteFMA, avx512vl_f32_info, "PS">; 6576 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6577 SchedWriteFMA, avx512vl_f64_info, "PD">, 6578 VEX_W; 6579} 6580 6581defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; 6582defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; 6583defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; 6584defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; 6585defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; 6586defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; 6587 6588 6589multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6590 X86FoldableSchedWrite sched, 6591 X86VectorVTInfo _, string Suff> { 6592 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6593 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6594 (ins _.RC:$src2, _.RC:$src3), 6595 OpcodeStr, "$src3, $src2", "$src2, $src3", 6596 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1, 6597 vselect, 1>, AVX512FMA3Base, Sched<[sched]>; 6598 6599 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6600 (ins _.RC:$src2, _.MemOp:$src3), 6601 OpcodeStr, "$src3, $src2", "$src2, $src3", 6602 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6603 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 6604 6605 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6606 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6607 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6608 "$src2, ${src3}"##_.BroadcastStr, 6609 (_.VT (OpNode _.RC:$src2, 6610 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 6611 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6612 Sched<[sched.Folded, ReadAfterLd]>; 6613 } 6614} 6615 6616multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6617 X86FoldableSchedWrite sched, 6618 X86VectorVTInfo _, string Suff> { 6619 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6620 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6621 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6622 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6623 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 6624 1, 1, vselect, 1>, 6625 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6626} 6627 6628multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6629 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6630 AVX512VLVectorVTInfo _, string Suff> { 6631 let Predicates = [HasAVX512] in { 6632 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6633 _.info512, Suff>, 6634 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6635 _.info512, Suff>, 6636 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6637 } 6638 let Predicates = [HasVLX, HasAVX512] in { 6639 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM, 6640 _.info256, Suff>, 6641 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6642 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM, 6643 _.info128, Suff>, 6644 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6645 } 6646} 6647 6648multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6649 SDNode OpNodeRnd > { 6650 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6651 SchedWriteFMA, avx512vl_f32_info, "PS">; 6652 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6653 SchedWriteFMA, avx512vl_f64_info, "PD">, 6654 VEX_W; 6655} 6656 6657defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; 6658defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; 6659defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; 6660defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; 6661defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; 6662defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; 6663 6664multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6665 X86FoldableSchedWrite sched, 6666 X86VectorVTInfo _, string Suff> { 6667 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6668 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6669 (ins _.RC:$src2, _.RC:$src3), 6670 OpcodeStr, "$src3, $src2", "$src2, $src3", 6671 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>, 6672 AVX512FMA3Base, Sched<[sched]>; 6673 6674 // Pattern is 312 order so that the load is in a different place from the 6675 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6676 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6677 (ins _.RC:$src2, _.MemOp:$src3), 6678 OpcodeStr, "$src3, $src2", "$src2, $src3", 6679 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6680 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 6681 6682 // Pattern is 312 order so that the load is in a different place from the 6683 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6684 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6685 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6686 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6687 "$src2, ${src3}"##_.BroadcastStr, 6688 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 6689 _.RC:$src1, _.RC:$src2)), 1, 0>, 6690 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 6691 } 6692} 6693 6694multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6695 X86FoldableSchedWrite sched, 6696 X86VectorVTInfo _, string Suff> { 6697 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6698 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6699 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6700 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6701 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 6702 1, 1, vselect, 1>, 6703 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6704} 6705 6706multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6707 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6708 AVX512VLVectorVTInfo _, string Suff> { 6709 let Predicates = [HasAVX512] in { 6710 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6711 _.info512, Suff>, 6712 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6713 _.info512, Suff>, 6714 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6715 } 6716 let Predicates = [HasVLX, HasAVX512] in { 6717 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM, 6718 _.info256, Suff>, 6719 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6720 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM, 6721 _.info128, Suff>, 6722 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6723 } 6724} 6725 6726multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6727 SDNode OpNodeRnd > { 6728 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6729 SchedWriteFMA, avx512vl_f32_info, "PS">; 6730 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6731 SchedWriteFMA, avx512vl_f64_info, "PD">, 6732 VEX_W; 6733} 6734 6735defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; 6736defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; 6737defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; 6738defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; 6739defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; 6740defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; 6741 6742// Scalar FMA 6743multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6744 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6745let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6746 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6747 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6748 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6749 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>; 6750 6751 let mayLoad = 1 in 6752 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6753 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6754 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6755 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; 6756 6757 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6758 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6759 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6760 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6761 6762 let isCodeGenOnly = 1, isCommutable = 1 in { 6763 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6764 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6765 !strconcat(OpcodeStr, 6766 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6767 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>; 6768 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6769 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6770 !strconcat(OpcodeStr, 6771 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6772 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; 6773 6774 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6775 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6776 !strconcat(OpcodeStr, 6777 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6778 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6779 Sched<[SchedWriteFMA.Scl]>; 6780 }// isCodeGenOnly = 1 6781}// Constraints = "$src1 = $dst" 6782} 6783 6784multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6785 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 6786 X86VectorVTInfo _, string SUFF> { 6787 let ExeDomain = _.ExeDomain in { 6788 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6789 // Operands for intrinsic are in 123 order to preserve passthu 6790 // semantics. 6791 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6792 _.FRC:$src3))), 6793 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6794 (_.ScalarLdFrag addr:$src3)))), 6795 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6796 _.FRC:$src3, (i32 imm:$rc)))), 0>; 6797 6798 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6799 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6800 _.FRC:$src1))), 6801 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6802 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6803 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6804 _.FRC:$src1, (i32 imm:$rc)))), 1>; 6805 6806 // One pattern is 312 order so that the load is in a different place from the 6807 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6808 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6809 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6810 _.FRC:$src2))), 6811 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6812 _.FRC:$src1, _.FRC:$src2))), 6813 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6814 _.FRC:$src2, (i32 imm:$rc)))), 1>; 6815 } 6816} 6817 6818multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6819 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { 6820 let Predicates = [HasAVX512] in { 6821 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6822 OpNodeRnd, f32x_info, "SS">, 6823 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6824 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6825 OpNodeRnd, f64x_info, "SD">, 6826 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6827 } 6828} 6829 6830defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; 6831defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; 6832defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; 6833defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; 6834 6835multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, 6836 string Suffix, SDNode Move, 6837 X86VectorVTInfo _, PatLeaf ZeroFP> { 6838 let Predicates = [HasAVX512] in { 6839 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6840 (Op _.FRC:$src2, 6841 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6842 _.FRC:$src3))))), 6843 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6844 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6845 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6846 6847 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6848 (Op _.FRC:$src2, _.FRC:$src3, 6849 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6850 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6851 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6852 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6853 6854 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6855 (Op _.FRC:$src2, 6856 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6857 (_.ScalarLdFrag addr:$src3)))))), 6858 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6859 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6860 addr:$src3)>; 6861 6862 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6863 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6864 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6865 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6866 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6867 addr:$src3)>; 6868 6869 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6870 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6871 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6872 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6873 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6874 addr:$src3)>; 6875 6876 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6877 (X86selects VK1WM:$mask, 6878 (Op _.FRC:$src2, 6879 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6880 _.FRC:$src3), 6881 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6882 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6883 VR128X:$src1, VK1WM:$mask, 6884 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6885 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6886 6887 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6888 (X86selects VK1WM:$mask, 6889 (Op _.FRC:$src2, 6890 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6891 (_.ScalarLdFrag addr:$src3)), 6892 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6893 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6894 VR128X:$src1, VK1WM:$mask, 6895 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6896 6897 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6898 (X86selects VK1WM:$mask, 6899 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6900 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6901 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6902 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6903 VR128X:$src1, VK1WM:$mask, 6904 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6905 6906 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6907 (X86selects VK1WM:$mask, 6908 (Op _.FRC:$src2, _.FRC:$src3, 6909 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6910 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6911 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6912 VR128X:$src1, VK1WM:$mask, 6913 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6914 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6915 6916 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6917 (X86selects VK1WM:$mask, 6918 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6919 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6920 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6921 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6922 VR128X:$src1, VK1WM:$mask, 6923 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6924 6925 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6926 (X86selects VK1WM:$mask, 6927 (Op _.FRC:$src2, 6928 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6929 _.FRC:$src3), 6930 (_.EltVT ZeroFP)))))), 6931 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6932 VR128X:$src1, VK1WM:$mask, 6933 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6934 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6935 6936 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6937 (X86selects VK1WM:$mask, 6938 (Op _.FRC:$src2, _.FRC:$src3, 6939 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6940 (_.EltVT ZeroFP)))))), 6941 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6942 VR128X:$src1, VK1WM:$mask, 6943 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6944 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6945 6946 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6947 (X86selects VK1WM:$mask, 6948 (Op _.FRC:$src2, 6949 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6950 (_.ScalarLdFrag addr:$src3)), 6951 (_.EltVT ZeroFP)))))), 6952 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6953 VR128X:$src1, VK1WM:$mask, 6954 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6955 6956 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6957 (X86selects VK1WM:$mask, 6958 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6959 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6960 (_.EltVT ZeroFP)))))), 6961 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6962 VR128X:$src1, VK1WM:$mask, 6963 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6964 6965 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6966 (X86selects VK1WM:$mask, 6967 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6968 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6969 (_.EltVT ZeroFP)))))), 6970 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6971 VR128X:$src1, VK1WM:$mask, 6972 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6973 6974 // Patterns with rounding mode. 6975 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6976 (RndOp _.FRC:$src2, 6977 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6978 _.FRC:$src3, (i32 imm:$rc)))))), 6979 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6980 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6981 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 6982 6983 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6984 (RndOp _.FRC:$src2, _.FRC:$src3, 6985 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6986 (i32 imm:$rc)))))), 6987 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6988 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6989 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 6990 6991 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6992 (X86selects VK1WM:$mask, 6993 (RndOp _.FRC:$src2, 6994 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6995 _.FRC:$src3, (i32 imm:$rc)), 6996 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6997 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6998 VR128X:$src1, VK1WM:$mask, 6999 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7000 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 7001 7002 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7003 (X86selects VK1WM:$mask, 7004 (RndOp _.FRC:$src2, _.FRC:$src3, 7005 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7006 (i32 imm:$rc)), 7007 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7008 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7009 VR128X:$src1, VK1WM:$mask, 7010 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7011 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 7012 7013 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7014 (X86selects VK1WM:$mask, 7015 (RndOp _.FRC:$src2, 7016 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7017 _.FRC:$src3, (i32 imm:$rc)), 7018 (_.EltVT ZeroFP)))))), 7019 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7020 VR128X:$src1, VK1WM:$mask, 7021 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7022 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 7023 7024 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7025 (X86selects VK1WM:$mask, 7026 (RndOp _.FRC:$src2, _.FRC:$src3, 7027 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7028 (i32 imm:$rc)), 7029 (_.EltVT ZeroFP)))))), 7030 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7031 VR128X:$src1, VK1WM:$mask, 7032 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7033 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 7034 } 7035} 7036 7037defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS", 7038 X86Movss, v4f32x_info, fp32imm0>; 7039defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS", 7040 X86Movss, v4f32x_info, fp32imm0>; 7041defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS", 7042 X86Movss, v4f32x_info, fp32imm0>; 7043defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS", 7044 X86Movss, v4f32x_info, fp32imm0>; 7045 7046defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD", 7047 X86Movsd, v2f64x_info, fp64imm0>; 7048defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD", 7049 X86Movsd, v2f64x_info, fp64imm0>; 7050defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD", 7051 X86Movsd, v2f64x_info, fp64imm0>; 7052defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD", 7053 X86Movsd, v2f64x_info, fp64imm0>; 7054 7055//===----------------------------------------------------------------------===// 7056// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7057//===----------------------------------------------------------------------===// 7058let Constraints = "$src1 = $dst" in { 7059multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7060 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7061 // NOTE: The SDNode have the multiply operands first with the add last. 7062 // This enables commuted load patterns to be autogenerated by tablegen. 7063 let ExeDomain = _.ExeDomain in { 7064 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7065 (ins _.RC:$src2, _.RC:$src3), 7066 OpcodeStr, "$src3, $src2", "$src2, $src3", 7067 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7068 AVX512FMA3Base, Sched<[sched]>; 7069 7070 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7071 (ins _.RC:$src2, _.MemOp:$src3), 7072 OpcodeStr, "$src3, $src2", "$src2, $src3", 7073 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7074 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 7075 7076 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7077 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7078 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7079 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7080 (OpNode _.RC:$src2, 7081 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), 7082 _.RC:$src1)>, 7083 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 7084 } 7085} 7086} // Constraints = "$src1 = $dst" 7087 7088multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7089 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7090 let Predicates = [HasIFMA] in { 7091 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7092 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7093 } 7094 let Predicates = [HasVLX, HasIFMA] in { 7095 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7096 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7097 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7098 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7099 } 7100} 7101 7102defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7103 SchedWriteVecIMul, avx512vl_i64_info>, 7104 VEX_W; 7105defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7106 SchedWriteVecIMul, avx512vl_i64_info>, 7107 VEX_W; 7108 7109//===----------------------------------------------------------------------===// 7110// AVX-512 Scalar convert from sign integer to float/double 7111//===----------------------------------------------------------------------===// 7112 7113multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched, 7114 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7115 X86MemOperand x86memop, PatFrag ld_frag, string asm> { 7116 let hasSideEffects = 0 in { 7117 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7118 (ins DstVT.FRC:$src1, SrcRC:$src), 7119 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7120 EVEX_4V, Sched<[sched]>; 7121 let mayLoad = 1 in 7122 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7123 (ins DstVT.FRC:$src1, x86memop:$src), 7124 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7125 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 7126 } // hasSideEffects = 0 7127 let isCodeGenOnly = 1 in { 7128 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7129 (ins DstVT.RC:$src1, SrcRC:$src2), 7130 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7131 [(set DstVT.RC:$dst, 7132 (OpNode (DstVT.VT DstVT.RC:$src1), 7133 SrcRC:$src2, 7134 (i32 FROUND_CURRENT)))]>, 7135 EVEX_4V, Sched<[sched]>; 7136 7137 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7138 (ins DstVT.RC:$src1, x86memop:$src2), 7139 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7140 [(set DstVT.RC:$dst, 7141 (OpNode (DstVT.VT DstVT.RC:$src1), 7142 (ld_frag addr:$src2), 7143 (i32 FROUND_CURRENT)))]>, 7144 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 7145 }//isCodeGenOnly = 1 7146} 7147 7148multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7149 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7150 X86VectorVTInfo DstVT, string asm> { 7151 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7152 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7153 !strconcat(asm, 7154 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7155 [(set DstVT.RC:$dst, 7156 (OpNode (DstVT.VT DstVT.RC:$src1), 7157 SrcRC:$src2, 7158 (i32 imm:$rc)))]>, 7159 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 7160} 7161 7162multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, 7163 X86FoldableSchedWrite sched, 7164 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7165 X86MemOperand x86memop, PatFrag ld_frag, string asm> { 7166 defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>, 7167 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7168 ld_frag, asm>, VEX_LIG; 7169} 7170 7171let Predicates = [HasAVX512] in { 7172defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32, 7173 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, 7174 XS, EVEX_CD8<32, CD8VT1>; 7175defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64, 7176 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, 7177 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7178defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32, 7179 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, 7180 XD, EVEX_CD8<32, CD8VT1>; 7181defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64, 7182 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, 7183 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7184 7185def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7186 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7187def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7188 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7189 7190def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 7191 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7192def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 7193 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7194def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 7195 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7196def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 7197 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7198 7199def : Pat<(f32 (sint_to_fp GR32:$src)), 7200 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7201def : Pat<(f32 (sint_to_fp GR64:$src)), 7202 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7203def : Pat<(f64 (sint_to_fp GR32:$src)), 7204 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7205def : Pat<(f64 (sint_to_fp GR64:$src)), 7206 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7207 7208defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32, 7209 v4f32x_info, i32mem, loadi32, 7210 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; 7211defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64, 7212 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, 7213 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7214defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info, 7215 i32mem, loadi32, "cvtusi2sd{l}">, 7216 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7217defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64, 7218 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, 7219 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7220 7221def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7222 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7223def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7224 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7225 7226def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), 7227 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7228def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))), 7229 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7230def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))), 7231 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7232def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))), 7233 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7234 7235def : Pat<(f32 (uint_to_fp GR32:$src)), 7236 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7237def : Pat<(f32 (uint_to_fp GR64:$src)), 7238 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7239def : Pat<(f64 (uint_to_fp GR32:$src)), 7240 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7241def : Pat<(f64 (uint_to_fp GR64:$src)), 7242 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7243} 7244 7245//===----------------------------------------------------------------------===// 7246// AVX-512 Scalar convert from float/double to integer 7247//===----------------------------------------------------------------------===// 7248 7249multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7250 X86VectorVTInfo DstVT, SDNode OpNode, 7251 X86FoldableSchedWrite sched, string asm, 7252 string aliasStr, 7253 bit CodeGenOnly = 1> { 7254 let Predicates = [HasAVX512] in { 7255 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7256 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7257 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>, 7258 EVEX, VEX_LIG, Sched<[sched]>; 7259 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7260 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7261 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>, 7262 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7263 Sched<[sched]>; 7264 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in 7265 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7266 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7267 [(set DstVT.RC:$dst, (OpNode 7268 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src), 7269 (i32 FROUND_CURRENT)))]>, 7270 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; 7271 7272 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7273 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7274 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7275 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7276 } // Predicates = [HasAVX512] 7277} 7278 7279multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT, 7280 X86VectorVTInfo DstVT, SDNode OpNode, 7281 X86FoldableSchedWrite sched, string asm, 7282 string aliasStr> : 7283 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> { 7284 let Predicates = [HasAVX512] in { 7285 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7286 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7287 SrcVT.IntScalarMemOp:$src), 0, "att">; 7288 } // Predicates = [HasAVX512] 7289} 7290 7291// Convert float/double to signed/unsigned int 32/64 7292defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, 7293 X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">, 7294 XS, EVEX_CD8<32, CD8VT1>; 7295defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, 7296 X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">, 7297 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7298defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, 7299 X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">, 7300 XS, EVEX_CD8<32, CD8VT1>; 7301defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, 7302 X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">, 7303 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7304defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, 7305 X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">, 7306 XD, EVEX_CD8<64, CD8VT1>; 7307defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, 7308 X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">, 7309 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7310defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, 7311 X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7312 XD, EVEX_CD8<64, CD8VT1>; 7313defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, 7314 X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7315 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7316 7317// The SSE version of these instructions are disabled for AVX512. 7318// Therefore, the SSE intrinsics are mapped to the AVX512 instructions. 7319let Predicates = [HasAVX512] in { 7320 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))), 7321 (VCVTSS2SIZrr_Int VR128X:$src)>; 7322 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)), 7323 (VCVTSS2SIZrm_Int sse_load_f32:$src)>; 7324 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))), 7325 (VCVTSS2SI64Zrr_Int VR128X:$src)>; 7326 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)), 7327 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>; 7328 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))), 7329 (VCVTSD2SIZrr_Int VR128X:$src)>; 7330 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)), 7331 (VCVTSD2SIZrm_Int sse_load_f64:$src)>; 7332 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))), 7333 (VCVTSD2SI64Zrr_Int VR128X:$src)>; 7334 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)), 7335 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>; 7336} // HasAVX512 7337 7338// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7339// which produce unnecessary vmovs{s,d} instructions 7340let Predicates = [HasAVX512] in { 7341def : Pat<(v4f32 (X86Movss 7342 (v4f32 VR128X:$dst), 7343 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 7344 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7345 7346def : Pat<(v4f32 (X86Movss 7347 (v4f32 VR128X:$dst), 7348 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 7349 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7350 7351def : Pat<(v4f32 (X86Movss 7352 (v4f32 VR128X:$dst), 7353 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 7354 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7355 7356def : Pat<(v4f32 (X86Movss 7357 (v4f32 VR128X:$dst), 7358 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 7359 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7360 7361def : Pat<(v2f64 (X86Movsd 7362 (v2f64 VR128X:$dst), 7363 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 7364 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7365 7366def : Pat<(v2f64 (X86Movsd 7367 (v2f64 VR128X:$dst), 7368 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 7369 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7370 7371def : Pat<(v2f64 (X86Movsd 7372 (v2f64 VR128X:$dst), 7373 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 7374 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7375 7376def : Pat<(v2f64 (X86Movsd 7377 (v2f64 VR128X:$dst), 7378 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 7379 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7380 7381def : Pat<(v4f32 (X86Movss 7382 (v4f32 VR128X:$dst), 7383 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))), 7384 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7385 7386def : Pat<(v4f32 (X86Movss 7387 (v4f32 VR128X:$dst), 7388 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))), 7389 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7390 7391def : Pat<(v4f32 (X86Movss 7392 (v4f32 VR128X:$dst), 7393 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))), 7394 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7395 7396def : Pat<(v4f32 (X86Movss 7397 (v4f32 VR128X:$dst), 7398 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))), 7399 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7400 7401def : Pat<(v2f64 (X86Movsd 7402 (v2f64 VR128X:$dst), 7403 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))), 7404 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7405 7406def : Pat<(v2f64 (X86Movsd 7407 (v2f64 VR128X:$dst), 7408 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))), 7409 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7410 7411def : Pat<(v2f64 (X86Movsd 7412 (v2f64 VR128X:$dst), 7413 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))), 7414 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7415 7416def : Pat<(v2f64 (X86Movsd 7417 (v2f64 VR128X:$dst), 7418 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))), 7419 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7420} // Predicates = [HasAVX512] 7421 7422// Convert float/double to signed/unsigned int 32/64 with truncation 7423multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7424 X86VectorVTInfo _DstRC, SDNode OpNode, 7425 SDNode OpNodeRnd, X86FoldableSchedWrite sched, 7426 string aliasStr, bit CodeGenOnly = 1>{ 7427let Predicates = [HasAVX512] in { 7428 let isCodeGenOnly = 1 in { 7429 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7430 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7431 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7432 EVEX, Sched<[sched]>; 7433 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7434 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7435 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7436 EVEX, Sched<[sched.Folded, ReadAfterLd]>; 7437 } 7438 7439 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7440 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7441 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src), 7442 (i32 FROUND_CURRENT)))]>, 7443 EVEX, VEX_LIG, Sched<[sched]>; 7444 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7445 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7446 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src), 7447 (i32 FROUND_NO_EXC)))]>, 7448 EVEX,VEX_LIG , EVEX_B, Sched<[sched]>; 7449 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in 7450 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7451 (ins _SrcRC.IntScalarMemOp:$src), 7452 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7453 [(set _DstRC.RC:$dst, (OpNodeRnd 7454 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src), 7455 (i32 FROUND_CURRENT)))]>, 7456 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; 7457 7458 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7459 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7460 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7461 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7462} //HasAVX512 7463} 7464 7465multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm, 7466 X86VectorVTInfo _SrcRC, 7467 X86VectorVTInfo _DstRC, SDNode OpNode, 7468 SDNode OpNodeRnd, X86FoldableSchedWrite sched, 7469 string aliasStr> : 7470 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched, 7471 aliasStr, 0> { 7472let Predicates = [HasAVX512] in { 7473 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7474 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7475 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7476} 7477} 7478 7479defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7480 fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">, 7481 XS, EVEX_CD8<32, CD8VT1>; 7482defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7483 fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">, 7484 VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7485defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7486 fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">, 7487 XD, EVEX_CD8<64, CD8VT1>; 7488defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7489 fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">, 7490 VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7491 7492defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info, 7493 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">, 7494 XS, EVEX_CD8<32, CD8VT1>; 7495defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info, 7496 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">, 7497 XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7498defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7499 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">, 7500 XD, EVEX_CD8<64, CD8VT1>; 7501defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7502 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">, 7503 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7504 7505let Predicates = [HasAVX512] in { 7506 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))), 7507 (VCVTTSS2SIZrr_Int VR128X:$src)>; 7508 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)), 7509 (VCVTTSS2SIZrm_Int ssmem:$src)>; 7510 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))), 7511 (VCVTTSS2SI64Zrr_Int VR128X:$src)>; 7512 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)), 7513 (VCVTTSS2SI64Zrm_Int ssmem:$src)>; 7514 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))), 7515 (VCVTTSD2SIZrr_Int VR128X:$src)>; 7516 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)), 7517 (VCVTTSD2SIZrm_Int sdmem:$src)>; 7518 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))), 7519 (VCVTTSD2SI64Zrr_Int VR128X:$src)>; 7520 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)), 7521 (VCVTTSD2SI64Zrm_Int sdmem:$src)>; 7522} // HasAVX512 7523 7524//===----------------------------------------------------------------------===// 7525// AVX-512 Convert form float to double and back 7526//===----------------------------------------------------------------------===// 7527 7528multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7529 X86VectorVTInfo _Src, SDNode OpNode, 7530 X86FoldableSchedWrite sched> { 7531 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7532 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7533 "$src2, $src1", "$src1, $src2", 7534 (_.VT (OpNode (_.VT _.RC:$src1), 7535 (_Src.VT _Src.RC:$src2), 7536 (i32 FROUND_CURRENT)))>, 7537 EVEX_4V, VEX_LIG, Sched<[sched]>; 7538 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7539 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7540 "$src2, $src1", "$src1, $src2", 7541 (_.VT (OpNode (_.VT _.RC:$src1), 7542 (_Src.VT _Src.ScalarIntMemCPat:$src2), 7543 (i32 FROUND_CURRENT)))>, 7544 EVEX_4V, VEX_LIG, 7545 Sched<[sched.Folded, ReadAfterLd]>; 7546 7547 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7548 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7549 (ins _.FRC:$src1, _Src.FRC:$src2), 7550 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7551 EVEX_4V, VEX_LIG, Sched<[sched]>; 7552 let mayLoad = 1 in 7553 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7554 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7555 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7556 EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; 7557 } 7558} 7559 7560// Scalar Coversion with SAE - suppress all exceptions 7561multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7562 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7563 X86FoldableSchedWrite sched> { 7564 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7565 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7566 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7567 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7568 (_Src.VT _Src.RC:$src2), 7569 (i32 FROUND_NO_EXC)))>, 7570 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7571} 7572 7573// Scalar Conversion with rounding control (RC) 7574multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7575 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7576 X86FoldableSchedWrite sched> { 7577 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7578 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7579 "$rc, $src2, $src1", "$src1, $src2, $rc", 7580 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7581 (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>, 7582 EVEX_4V, VEX_LIG, Sched<[sched]>, 7583 EVEX_B, EVEX_RC; 7584} 7585multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7586 SDNode OpNodeRnd, X86FoldableSchedWrite sched, 7587 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7588 let Predicates = [HasAVX512] in { 7589 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, 7590 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7591 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7592 } 7593} 7594 7595multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 7596 X86FoldableSchedWrite sched, 7597 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7598 let Predicates = [HasAVX512] in { 7599 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, 7600 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, 7601 EVEX_CD8<32, CD8VT1>, XS; 7602 } 7603} 7604defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", 7605 X86froundRnd, WriteCvtSD2SS, f64x_info, 7606 f32x_info>; 7607defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", 7608 X86fpextRnd, WriteCvtSS2SD, f32x_info, 7609 f64x_info>; 7610 7611def : Pat<(f64 (fpextend FR32X:$src)), 7612 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7613 Requires<[HasAVX512]>; 7614def : Pat<(f64 (fpextend (loadf32 addr:$src))), 7615 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7616 Requires<[HasAVX512, OptForSize]>; 7617 7618def : Pat<(f64 (extloadf32 addr:$src)), 7619 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7620 Requires<[HasAVX512, OptForSize]>; 7621 7622def : Pat<(f64 (extloadf32 addr:$src)), 7623 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, 7624 Requires<[HasAVX512, OptForSpeed]>; 7625 7626def : Pat<(f32 (fpround FR64X:$src)), 7627 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7628 Requires<[HasAVX512]>; 7629 7630def : Pat<(v4f32 (X86Movss 7631 (v4f32 VR128X:$dst), 7632 (v4f32 (scalar_to_vector 7633 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7634 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7635 Requires<[HasAVX512]>; 7636 7637def : Pat<(v2f64 (X86Movsd 7638 (v2f64 VR128X:$dst), 7639 (v2f64 (scalar_to_vector 7640 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7641 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7642 Requires<[HasAVX512]>; 7643 7644//===----------------------------------------------------------------------===// 7645// AVX-512 Vector convert from signed/unsigned integer to float/double 7646// and from float/double to signed/unsigned integer 7647//===----------------------------------------------------------------------===// 7648 7649multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7650 X86VectorVTInfo _Src, SDNode OpNode, 7651 X86FoldableSchedWrite sched, 7652 string Broadcast = _.BroadcastStr, 7653 string Alias = "", X86MemOperand MemOp = _Src.MemOp> { 7654 7655 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7656 (ins _Src.RC:$src), OpcodeStr, "$src", "$src", 7657 (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, 7658 EVEX, Sched<[sched]>; 7659 7660 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 7661 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src", 7662 (_.VT (OpNode (_Src.VT 7663 (bitconvert (_Src.LdFrag addr:$src)))))>, 7664 EVEX, Sched<[sched.Folded]>; 7665 7666 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 7667 (ins _Src.ScalarMemOp:$src), OpcodeStr, 7668 "${src}"##Broadcast, "${src}"##Broadcast, 7669 (_.VT (OpNode (_Src.VT 7670 (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) 7671 ))>, EVEX, EVEX_B, 7672 Sched<[sched.Folded]>; 7673} 7674// Coversion with SAE - suppress all exceptions 7675multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7676 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7677 X86FoldableSchedWrite sched> { 7678 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7679 (ins _Src.RC:$src), OpcodeStr, 7680 "{sae}, $src", "$src, {sae}", 7681 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), 7682 (i32 FROUND_NO_EXC)))>, 7683 EVEX, EVEX_B, Sched<[sched]>; 7684} 7685 7686// Conversion with rounding control (RC) 7687multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7688 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7689 X86FoldableSchedWrite sched> { 7690 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7691 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7692 "$rc, $src", "$src, $rc", 7693 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>, 7694 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7695} 7696 7697// Extend Float to Double 7698multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7699 X86SchedWriteWidths sched> { 7700 let Predicates = [HasAVX512] in { 7701 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, 7702 fpextend, sched.ZMM>, 7703 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7704 X86vfpextRnd, sched.ZMM>, EVEX_V512; 7705 } 7706 let Predicates = [HasVLX] in { 7707 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7708 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7709 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, 7710 sched.YMM>, EVEX_V256; 7711 } 7712} 7713 7714// Truncate Double to Float 7715multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7716 let Predicates = [HasAVX512] in { 7717 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>, 7718 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7719 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7720 } 7721 let Predicates = [HasVLX] in { 7722 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7723 X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128; 7724 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround, 7725 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7726 7727 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7728 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7729 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7730 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">; 7731 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7732 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 7733 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7734 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; 7735 } 7736} 7737 7738defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7739 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7740defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7741 PS, EVEX_CD8<32, CD8VH>; 7742 7743def : Pat<(v8f64 (extloadv8f32 addr:$src)), 7744 (VCVTPS2PDZrm addr:$src)>; 7745 7746let Predicates = [HasVLX] in { 7747 def : Pat<(X86vzmovl (v2f64 (bitconvert 7748 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), 7749 (VCVTPD2PSZ128rr VR128X:$src)>; 7750 def : Pat<(X86vzmovl (v2f64 (bitconvert 7751 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), 7752 (VCVTPD2PSZ128rm addr:$src)>; 7753 def : Pat<(v2f64 (extloadv2f32 addr:$src)), 7754 (VCVTPS2PDZ128rm addr:$src)>; 7755 def : Pat<(v4f64 (extloadv4f32 addr:$src)), 7756 (VCVTPS2PDZ256rm addr:$src)>; 7757} 7758 7759// Convert Signed/Unsigned Doubleword to Double 7760multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7761 SDNode OpNode128, X86SchedWriteWidths sched> { 7762 // No rounding in this op 7763 let Predicates = [HasAVX512] in 7764 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7765 sched.ZMM>, EVEX_V512; 7766 7767 let Predicates = [HasVLX] in { 7768 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7769 OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128; 7770 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7771 sched.YMM>, EVEX_V256; 7772 } 7773} 7774 7775// Convert Signed/Unsigned Doubleword to Float 7776multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7777 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7778 let Predicates = [HasAVX512] in 7779 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7780 sched.ZMM>, 7781 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7782 OpNodeRnd, sched.ZMM>, EVEX_V512; 7783 7784 let Predicates = [HasVLX] in { 7785 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7786 sched.XMM>, EVEX_V128; 7787 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7788 sched.YMM>, EVEX_V256; 7789 } 7790} 7791 7792// Convert Float to Signed/Unsigned Doubleword with truncation 7793multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7794 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7795 let Predicates = [HasAVX512] in { 7796 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7797 sched.ZMM>, 7798 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7799 OpNodeRnd, sched.ZMM>, EVEX_V512; 7800 } 7801 let Predicates = [HasVLX] in { 7802 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7803 sched.XMM>, EVEX_V128; 7804 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7805 sched.YMM>, EVEX_V256; 7806 } 7807} 7808 7809// Convert Float to Signed/Unsigned Doubleword 7810multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7811 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7812 let Predicates = [HasAVX512] in { 7813 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7814 sched.ZMM>, 7815 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7816 OpNodeRnd, sched.ZMM>, EVEX_V512; 7817 } 7818 let Predicates = [HasVLX] in { 7819 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7820 sched.XMM>, EVEX_V128; 7821 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7822 sched.YMM>, EVEX_V256; 7823 } 7824} 7825 7826// Convert Double to Signed/Unsigned Doubleword with truncation 7827multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7828 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7829 let Predicates = [HasAVX512] in { 7830 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7831 sched.ZMM>, 7832 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7833 OpNodeRnd, sched.ZMM>, EVEX_V512; 7834 } 7835 let Predicates = [HasVLX] in { 7836 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7837 // memory forms of these instructions in Asm Parser. They have the same 7838 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7839 // due to the same reason. 7840 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7841 OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128; 7842 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7843 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7844 7845 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7846 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7847 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7848 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">; 7849 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7850 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 7851 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7852 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">; 7853 } 7854} 7855 7856// Convert Double to Signed/Unsigned Doubleword 7857multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7858 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7859 let Predicates = [HasAVX512] in { 7860 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7861 sched.ZMM>, 7862 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7863 OpNodeRnd, sched.ZMM>, EVEX_V512; 7864 } 7865 let Predicates = [HasVLX] in { 7866 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7867 // memory forms of these instructions in Asm Parcer. They have the same 7868 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7869 // due to the same reason. 7870 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode, 7871 sched.XMM, "{1to2}", "{x}">, EVEX_V128; 7872 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7873 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7874 7875 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7876 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7877 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7878 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">; 7879 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7880 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 7881 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7882 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; 7883 } 7884} 7885 7886// Convert Double to Signed/Unsigned Quardword 7887multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7888 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7889 let Predicates = [HasDQI] in { 7890 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7891 sched.ZMM>, 7892 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 7893 OpNodeRnd, sched.ZMM>, EVEX_V512; 7894 } 7895 let Predicates = [HasDQI, HasVLX] in { 7896 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7897 sched.XMM>, EVEX_V128; 7898 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7899 sched.YMM>, EVEX_V256; 7900 } 7901} 7902 7903// Convert Double to Signed/Unsigned Quardword with truncation 7904multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7905 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7906 let Predicates = [HasDQI] in { 7907 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7908 sched.ZMM>, 7909 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 7910 OpNodeRnd, sched.ZMM>, EVEX_V512; 7911 } 7912 let Predicates = [HasDQI, HasVLX] in { 7913 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7914 sched.XMM>, EVEX_V128; 7915 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7916 sched.YMM>, EVEX_V256; 7917 } 7918} 7919 7920// Convert Signed/Unsigned Quardword to Double 7921multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7922 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7923 let Predicates = [HasDQI] in { 7924 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 7925 sched.ZMM>, 7926 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 7927 OpNodeRnd, sched.ZMM>, EVEX_V512; 7928 } 7929 let Predicates = [HasDQI, HasVLX] in { 7930 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 7931 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 7932 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 7933 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 7934 } 7935} 7936 7937// Convert Float to Signed/Unsigned Quardword 7938multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7939 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7940 let Predicates = [HasDQI] in { 7941 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 7942 sched.ZMM>, 7943 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 7944 OpNodeRnd, sched.ZMM>, EVEX_V512; 7945 } 7946 let Predicates = [HasDQI, HasVLX] in { 7947 // Explicitly specified broadcast string, since we take only 2 elements 7948 // from v4f32x_info source 7949 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7950 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7951 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7952 sched.YMM>, EVEX_V256; 7953 } 7954} 7955 7956// Convert Float to Signed/Unsigned Quardword with truncation 7957multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7958 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7959 let Predicates = [HasDQI] in { 7960 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>, 7961 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 7962 OpNodeRnd, sched.ZMM>, EVEX_V512; 7963 } 7964 let Predicates = [HasDQI, HasVLX] in { 7965 // Explicitly specified broadcast string, since we take only 2 elements 7966 // from v4f32x_info source 7967 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7968 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7969 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7970 sched.YMM>, EVEX_V256; 7971 } 7972} 7973 7974// Convert Signed/Unsigned Quardword to Float 7975multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7976 SDNode OpNode128, SDNode OpNodeRnd, 7977 X86SchedWriteWidths sched> { 7978 let Predicates = [HasDQI] in { 7979 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 7980 sched.ZMM>, 7981 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 7982 OpNodeRnd, sched.ZMM>, EVEX_V512; 7983 } 7984 let Predicates = [HasDQI, HasVLX] in { 7985 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7986 // memory forms of these instructions in Asm Parcer. They have the same 7987 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7988 // due to the same reason. 7989 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128, 7990 sched.XMM, "{1to2}", "{x}">, EVEX_V128, 7991 NotEVEX2VEXConvertible; 7992 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 7993 sched.YMM, "{1to4}", "{y}">, EVEX_V256, 7994 NotEVEX2VEXConvertible; 7995 7996 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7997 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7998 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7999 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">; 8000 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 8001 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 8002 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 8003 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">; 8004 } 8005} 8006 8007defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP, 8008 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8009 8010defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, 8011 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8012 PS, EVEX_CD8<32, CD8VF>; 8013 8014defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, 8015 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, 8016 XS, EVEX_CD8<32, CD8VF>; 8017 8018defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, 8019 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, 8020 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8021 8022defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, 8023 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS, 8024 EVEX_CD8<32, CD8VF>; 8025 8026defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, 8027 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, 8028 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8029 8030defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, 8031 X86VUintToFP, SchedWriteCvtDQ2PD>, XS, 8032 EVEX_CD8<32, CD8VH>; 8033 8034defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, 8035 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD, 8036 EVEX_CD8<32, CD8VF>; 8037 8038defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, 8039 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8040 EVEX_CD8<32, CD8VF>; 8041 8042defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, 8043 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8044 VEX_W, EVEX_CD8<64, CD8VF>; 8045 8046defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, 8047 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8048 PS, EVEX_CD8<32, CD8VF>; 8049 8050defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, 8051 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8052 PS, EVEX_CD8<64, CD8VF>; 8053 8054defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, 8055 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8056 PD, EVEX_CD8<64, CD8VF>; 8057 8058defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, 8059 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8060 EVEX_CD8<32, CD8VH>; 8061 8062defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, 8063 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8064 PD, EVEX_CD8<64, CD8VF>; 8065 8066defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, 8067 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8068 EVEX_CD8<32, CD8VH>; 8069 8070defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, 8071 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W, 8072 PD, EVEX_CD8<64, CD8VF>; 8073 8074defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, 8075 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD, 8076 EVEX_CD8<32, CD8VH>; 8077 8078defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, 8079 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W, 8080 PD, EVEX_CD8<64, CD8VF>; 8081 8082defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, 8083 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD, 8084 EVEX_CD8<32, CD8VH>; 8085 8086defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, 8087 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8088 EVEX_CD8<64, CD8VF>; 8089 8090defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, 8091 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8092 EVEX_CD8<64, CD8VF>; 8093 8094defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP, 8095 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, 8096 EVEX_CD8<64, CD8VF>; 8097 8098defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP, 8099 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, 8100 EVEX_CD8<64, CD8VF>; 8101 8102let Predicates = [HasAVX512] in { 8103 def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))), 8104 (VCVTTPS2DQZrr VR512:$src)>; 8105 def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))), 8106 (VCVTTPS2DQZrm addr:$src)>; 8107 8108 def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))), 8109 (VCVTTPS2UDQZrr VR512:$src)>; 8110 def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))), 8111 (VCVTTPS2UDQZrm addr:$src)>; 8112 8113 def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))), 8114 (VCVTTPD2DQZrr VR512:$src)>; 8115 def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))), 8116 (VCVTTPD2DQZrm addr:$src)>; 8117 8118 def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))), 8119 (VCVTTPD2UDQZrr VR512:$src)>; 8120 def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))), 8121 (VCVTTPD2UDQZrm addr:$src)>; 8122} 8123 8124let Predicates = [HasVLX] in { 8125 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))), 8126 (VCVTTPS2DQZ128rr VR128X:$src)>; 8127 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), 8128 (VCVTTPS2DQZ128rm addr:$src)>; 8129 8130 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))), 8131 (VCVTTPS2UDQZ128rr VR128X:$src)>; 8132 def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))), 8133 (VCVTTPS2UDQZ128rm addr:$src)>; 8134 8135 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))), 8136 (VCVTTPS2DQZ256rr VR256X:$src)>; 8137 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), 8138 (VCVTTPS2DQZ256rm addr:$src)>; 8139 8140 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))), 8141 (VCVTTPS2UDQZ256rr VR256X:$src)>; 8142 def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))), 8143 (VCVTTPS2UDQZ256rm addr:$src)>; 8144 8145 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))), 8146 (VCVTTPD2DQZ256rr VR256X:$src)>; 8147 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 8148 (VCVTTPD2DQZ256rm addr:$src)>; 8149 8150 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))), 8151 (VCVTTPD2UDQZ256rr VR256X:$src)>; 8152 def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))), 8153 (VCVTTPD2UDQZ256rm addr:$src)>; 8154} 8155 8156let Predicates = [HasDQI] in { 8157 def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))), 8158 (VCVTTPS2QQZrr VR256X:$src)>; 8159 def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))), 8160 (VCVTTPS2QQZrm addr:$src)>; 8161 8162 def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))), 8163 (VCVTTPS2UQQZrr VR256X:$src)>; 8164 def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))), 8165 (VCVTTPS2UQQZrm addr:$src)>; 8166 8167 def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))), 8168 (VCVTTPD2QQZrr VR512:$src)>; 8169 def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))), 8170 (VCVTTPD2QQZrm addr:$src)>; 8171 8172 def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))), 8173 (VCVTTPD2UQQZrr VR512:$src)>; 8174 def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))), 8175 (VCVTTPD2UQQZrm addr:$src)>; 8176} 8177 8178let Predicates = [HasDQI, HasVLX] in { 8179 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))), 8180 (VCVTTPS2QQZ256rr VR128X:$src)>; 8181 def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))), 8182 (VCVTTPS2QQZ256rm addr:$src)>; 8183 8184 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))), 8185 (VCVTTPS2UQQZ256rr VR128X:$src)>; 8186 def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))), 8187 (VCVTTPS2UQQZ256rm addr:$src)>; 8188 8189 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))), 8190 (VCVTTPD2QQZ128rr VR128X:$src)>; 8191 def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))), 8192 (VCVTTPD2QQZ128rm addr:$src)>; 8193 8194 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))), 8195 (VCVTTPD2UQQZ128rr VR128X:$src)>; 8196 def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))), 8197 (VCVTTPD2UQQZ128rm addr:$src)>; 8198 8199 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))), 8200 (VCVTTPD2QQZ256rr VR256X:$src)>; 8201 def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))), 8202 (VCVTTPD2QQZ256rm addr:$src)>; 8203 8204 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))), 8205 (VCVTTPD2UQQZ256rr VR256X:$src)>; 8206 def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))), 8207 (VCVTTPD2UQQZ256rm addr:$src)>; 8208} 8209 8210let Predicates = [HasAVX512, NoVLX] in { 8211def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), 8212 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8213 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8214 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8215 8216def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), 8217 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8218 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8219 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8220 8221def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))), 8222 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr 8223 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8224 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8225 8226def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), 8227 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8228 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8229 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8230 8231def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), 8232 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8233 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8234 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8235 8236def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), 8237 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8238 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8239 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8240 8241def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), 8242 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8243 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8244 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8245} 8246 8247let Predicates = [HasAVX512, HasVLX] in { 8248 def : Pat<(X86vzmovl (v2i64 (bitconvert 8249 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), 8250 (VCVTPD2DQZ128rr VR128X:$src)>; 8251 def : Pat<(X86vzmovl (v2i64 (bitconvert 8252 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), 8253 (VCVTPD2DQZ128rm addr:$src)>; 8254 def : Pat<(X86vzmovl (v2i64 (bitconvert 8255 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), 8256 (VCVTPD2UDQZ128rr VR128X:$src)>; 8257 def : Pat<(X86vzmovl (v2i64 (bitconvert 8258 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), 8259 (VCVTTPD2DQZ128rr VR128X:$src)>; 8260 def : Pat<(X86vzmovl (v2i64 (bitconvert 8261 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), 8262 (VCVTTPD2DQZ128rm addr:$src)>; 8263 def : Pat<(X86vzmovl (v2i64 (bitconvert 8264 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), 8265 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8266 8267 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 8268 (VCVTDQ2PDZ128rm addr:$src)>; 8269 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), 8270 (VCVTDQ2PDZ128rm addr:$src)>; 8271 8272 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 8273 (VCVTUDQ2PDZ128rm addr:$src)>; 8274 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), 8275 (VCVTUDQ2PDZ128rm addr:$src)>; 8276} 8277 8278let Predicates = [HasAVX512] in { 8279 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), 8280 (VCVTPD2PSZrm addr:$src)>; 8281 def : Pat<(v8f64 (extloadv8f32 addr:$src)), 8282 (VCVTPS2PDZrm addr:$src)>; 8283} 8284 8285let Predicates = [HasDQI, HasVLX] in { 8286 def : Pat<(X86vzmovl (v2f64 (bitconvert 8287 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), 8288 (VCVTQQ2PSZ128rr VR128X:$src)>; 8289 def : Pat<(X86vzmovl (v2f64 (bitconvert 8290 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), 8291 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8292} 8293 8294let Predicates = [HasDQI, NoVLX] in { 8295def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))), 8296 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8297 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8298 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8299 8300def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))), 8301 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr 8302 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8303 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8304 8305def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))), 8306 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8307 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8308 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8309 8310def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))), 8311 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8312 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8313 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8314 8315def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))), 8316 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr 8317 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8318 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8319 8320def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))), 8321 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8322 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8323 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8324 8325def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))), 8326 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr 8327 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8328 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8329 8330def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))), 8331 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8332 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8333 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8334 8335def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))), 8336 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8337 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8338 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8339 8340def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))), 8341 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr 8342 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8343 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8344 8345def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))), 8346 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8347 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8348 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8349 8350def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), 8351 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8352 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8353 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8354} 8355 8356//===----------------------------------------------------------------------===// 8357// Half precision conversion instructions 8358//===----------------------------------------------------------------------===// 8359 8360multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8361 X86MemOperand x86memop, PatFrag ld_frag, 8362 X86FoldableSchedWrite sched> { 8363 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8364 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8365 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8366 T8PD, Sched<[sched]>; 8367 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8368 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8369 (X86cvtph2ps (_src.VT 8370 (bitconvert 8371 (ld_frag addr:$src))))>, 8372 T8PD, Sched<[sched.Folded]>; 8373} 8374 8375multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8376 X86FoldableSchedWrite sched> { 8377 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8378 (ins _src.RC:$src), "vcvtph2ps", 8379 "{sae}, $src", "$src, {sae}", 8380 (X86cvtph2psRnd (_src.VT _src.RC:$src), 8381 (i32 FROUND_NO_EXC))>, 8382 T8PD, EVEX_B, Sched<[sched]>; 8383} 8384 8385let Predicates = [HasAVX512] in 8386 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64, 8387 WriteCvtPH2PSZ>, 8388 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8389 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8390 8391let Predicates = [HasVLX] in { 8392 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8393 loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256, 8394 EVEX_CD8<32, CD8VH>; 8395 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8396 loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128, 8397 EVEX_CD8<32, CD8VH>; 8398 8399 // Pattern match vcvtph2ps of a scalar i64 load. 8400 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), 8401 (VCVTPH2PSZ128rm addr:$src)>; 8402 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), 8403 (VCVTPH2PSZ128rm addr:$src)>; 8404 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert 8405 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8406 (VCVTPH2PSZ128rm addr:$src)>; 8407} 8408 8409multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8410 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8411 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), 8412 (ins _src.RC:$src1, i32u8imm:$src2), 8413 "vcvtps2ph", "$src2, $src1", "$src1, $src2", 8414 (X86cvtps2ph (_src.VT _src.RC:$src1), 8415 (i32 imm:$src2)), 0, 0>, 8416 AVX512AIi8Base, Sched<[RR]>; 8417 let hasSideEffects = 0, mayStore = 1 in { 8418 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8419 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8420 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8421 Sched<[MR]>; 8422 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8423 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8424 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8425 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8426 } 8427} 8428 8429multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8430 SchedWrite Sched> { 8431 let hasSideEffects = 0 in 8432 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8433 (outs _dest.RC:$dst), 8434 (ins _src.RC:$src1, i32u8imm:$src2), 8435 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8436 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8437} 8438 8439let Predicates = [HasAVX512] in { 8440 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8441 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8442 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8443 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8444 let Predicates = [HasVLX] in { 8445 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8446 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8447 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8448 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8449 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8450 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8451 } 8452 8453 def : Pat<(store (f64 (extractelt 8454 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), 8455 (iPTR 0))), addr:$dst), 8456 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; 8457 def : Pat<(store (i64 (extractelt 8458 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), 8459 (iPTR 0))), addr:$dst), 8460 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; 8461 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), 8462 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; 8463 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), 8464 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; 8465} 8466 8467// Patterns for matching conversions from float to half-float and vice versa. 8468let Predicates = [HasVLX] in { 8469 // Use MXCSR.RC for rounding instead of explicitly specifying the default 8470 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the 8471 // configurations we support (the default). However, falling back to MXCSR is 8472 // more consistent with other instructions, which are always controlled by it. 8473 // It's encoded as 0b100. 8474 def : Pat<(fp_to_f16 FR32X:$src), 8475 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr 8476 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>; 8477 8478 def : Pat<(f16_to_fp GR16:$src), 8479 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8480 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >; 8481 8482 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), 8483 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8484 (v8i16 (VCVTPS2PHZ128rr 8485 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >; 8486} 8487 8488// Unordered/Ordered scalar fp compare with Sea and set EFLAGS 8489multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8490 string OpcodeStr, X86FoldableSchedWrite sched> { 8491 let hasSideEffects = 0 in 8492 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8493 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8494 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8495} 8496 8497let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8498 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, 8499 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8500 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, 8501 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8502 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, 8503 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8504 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, 8505 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8506} 8507 8508let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8509 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, 8510 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8511 EVEX_CD8<32, CD8VT1>; 8512 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, 8513 "ucomisd", WriteFCom>, PD, EVEX, 8514 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8515 let Pattern = []<dag> in { 8516 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, 8517 "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8518 EVEX_CD8<32, CD8VT1>; 8519 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, 8520 "comisd", WriteFCom>, PD, EVEX, 8521 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8522 } 8523 let isCodeGenOnly = 1 in { 8524 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8525 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8526 EVEX_CD8<32, CD8VT1>; 8527 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8528 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, 8529 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8530 8531 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8532 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8533 EVEX_CD8<32, CD8VT1>; 8534 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8535 sse_load_f64, "comisd", WriteFCom>, PD, EVEX, 8536 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8537 } 8538} 8539 8540/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8541multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8542 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8543 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 8544 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8545 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8546 "$src2, $src1", "$src1, $src2", 8547 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8548 EVEX_4V, Sched<[sched]>; 8549 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8550 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8551 "$src2, $src1", "$src1, $src2", 8552 (OpNode (_.VT _.RC:$src1), 8553 _.ScalarIntMemCPat:$src2)>, EVEX_4V, 8554 Sched<[sched.Folded, ReadAfterLd]>; 8555} 8556} 8557 8558defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8559 f32x_info>, EVEX_CD8<32, CD8VT1>, 8560 T8PD; 8561defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8562 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8563 T8PD; 8564defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8565 SchedWriteFRsqrt.Scl, f32x_info>, 8566 EVEX_CD8<32, CD8VT1>, T8PD; 8567defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8568 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8569 EVEX_CD8<64, CD8VT1>, T8PD; 8570 8571/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8572multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8573 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8574 let ExeDomain = _.ExeDomain in { 8575 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8576 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8577 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8578 Sched<[sched]>; 8579 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8580 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8581 (OpNode (_.VT 8582 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8583 Sched<[sched.Folded, ReadAfterLd]>; 8584 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8585 (ins _.ScalarMemOp:$src), OpcodeStr, 8586 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8587 (OpNode (_.VT 8588 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, 8589 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 8590 } 8591} 8592 8593multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8594 X86SchedWriteWidths sched> { 8595 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8596 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8597 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8598 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8599 8600 // Define only if AVX512VL feature is present. 8601 let Predicates = [HasVLX] in { 8602 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8603 OpNode, sched.XMM, v4f32x_info>, 8604 EVEX_V128, EVEX_CD8<32, CD8VF>; 8605 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8606 OpNode, sched.YMM, v8f32x_info>, 8607 EVEX_V256, EVEX_CD8<32, CD8VF>; 8608 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8609 OpNode, sched.XMM, v2f64x_info>, 8610 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8611 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8612 OpNode, sched.YMM, v4f64x_info>, 8613 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8614 } 8615} 8616 8617defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8618defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8619 8620/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8621multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8622 SDNode OpNode, X86FoldableSchedWrite sched> { 8623 let ExeDomain = _.ExeDomain in { 8624 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8625 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8626 "$src2, $src1", "$src1, $src2", 8627 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8628 (i32 FROUND_CURRENT))>, 8629 Sched<[sched]>; 8630 8631 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8632 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8633 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8634 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8635 (i32 FROUND_NO_EXC))>, EVEX_B, 8636 Sched<[sched]>; 8637 8638 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8639 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8640 "$src2, $src1", "$src1, $src2", 8641 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 8642 (i32 FROUND_CURRENT))>, 8643 Sched<[sched.Folded, ReadAfterLd]>; 8644 } 8645} 8646 8647multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8648 X86FoldableSchedWrite sched> { 8649 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>, 8650 EVEX_CD8<32, CD8VT1>; 8651 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>, 8652 EVEX_CD8<64, CD8VT1>, VEX_W; 8653} 8654 8655let Predicates = [HasERI] in { 8656 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>, 8657 T8PD, EVEX_4V; 8658 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, 8659 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8660} 8661 8662defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, 8663 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8664/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8665 8666multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8667 SDNode OpNode, X86FoldableSchedWrite sched> { 8668 let ExeDomain = _.ExeDomain in { 8669 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8670 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8671 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>, 8672 Sched<[sched]>; 8673 8674 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8675 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8676 (OpNode (_.VT 8677 (bitconvert (_.LdFrag addr:$src))), 8678 (i32 FROUND_CURRENT))>, 8679 Sched<[sched.Folded, ReadAfterLd]>; 8680 8681 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8682 (ins _.ScalarMemOp:$src), OpcodeStr, 8683 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8684 (OpNode (_.VT 8685 (X86VBroadcast (_.ScalarLdFrag addr:$src))), 8686 (i32 FROUND_CURRENT))>, EVEX_B, 8687 Sched<[sched.Folded, ReadAfterLd]>; 8688 } 8689} 8690multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8691 SDNode OpNode, X86FoldableSchedWrite sched> { 8692 let ExeDomain = _.ExeDomain in 8693 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8694 (ins _.RC:$src), OpcodeStr, 8695 "{sae}, $src", "$src, {sae}", 8696 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, 8697 EVEX_B, Sched<[sched]>; 8698} 8699 8700multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8701 X86SchedWriteWidths sched> { 8702 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8703 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8704 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8705 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8706 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8707 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8708} 8709 8710multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8711 SDNode OpNode, X86SchedWriteWidths sched> { 8712 // Define only if AVX512VL feature is present. 8713 let Predicates = [HasVLX] in { 8714 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>, 8715 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8716 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>, 8717 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8718 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>, 8719 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8720 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>, 8721 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8722 } 8723} 8724 8725let Predicates = [HasERI] in { 8726 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX; 8727 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX; 8728 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX; 8729} 8730defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>, 8731 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, 8732 SchedWriteFRnd>, EVEX; 8733 8734multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8735 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8736 let ExeDomain = _.ExeDomain in 8737 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8738 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8739 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>, 8740 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8741} 8742 8743multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8744 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8745 let ExeDomain = _.ExeDomain in { 8746 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8747 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8748 (_.VT (fsqrt _.RC:$src))>, EVEX, 8749 Sched<[sched]>; 8750 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8751 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8752 (fsqrt (_.VT 8753 (bitconvert (_.LdFrag addr:$src))))>, EVEX, 8754 Sched<[sched.Folded, ReadAfterLd]>; 8755 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8756 (ins _.ScalarMemOp:$src), OpcodeStr, 8757 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8758 (fsqrt (_.VT 8759 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, 8760 EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 8761 } 8762} 8763 8764multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8765 X86SchedWriteSizes sched> { 8766 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8767 sched.PS.ZMM, v16f32_info>, 8768 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8769 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8770 sched.PD.ZMM, v8f64_info>, 8771 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8772 // Define only if AVX512VL feature is present. 8773 let Predicates = [HasVLX] in { 8774 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8775 sched.PS.XMM, v4f32x_info>, 8776 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8777 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8778 sched.PS.YMM, v8f32x_info>, 8779 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8780 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8781 sched.PD.XMM, v2f64x_info>, 8782 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8783 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8784 sched.PD.YMM, v4f64x_info>, 8785 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8786 } 8787} 8788 8789multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8790 X86SchedWriteSizes sched> { 8791 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8792 sched.PS.ZMM, v16f32_info>, 8793 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8794 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8795 sched.PD.ZMM, v8f64_info>, 8796 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8797} 8798 8799multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8800 X86VectorVTInfo _, string Name> { 8801 let ExeDomain = _.ExeDomain in { 8802 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8803 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8804 "$src2, $src1", "$src1, $src2", 8805 (X86fsqrtRnds (_.VT _.RC:$src1), 8806 (_.VT _.RC:$src2), 8807 (i32 FROUND_CURRENT))>, 8808 Sched<[sched]>; 8809 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8810 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8811 "$src2, $src1", "$src1, $src2", 8812 (X86fsqrtRnds (_.VT _.RC:$src1), 8813 _.ScalarIntMemCPat:$src2, 8814 (i32 FROUND_CURRENT))>, 8815 Sched<[sched.Folded, ReadAfterLd]>; 8816 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8817 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 8818 "$rc, $src2, $src1", "$src1, $src2, $rc", 8819 (X86fsqrtRnds (_.VT _.RC:$src1), 8820 (_.VT _.RC:$src2), 8821 (i32 imm:$rc))>, 8822 EVEX_B, EVEX_RC, Sched<[sched]>; 8823 8824 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 8825 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8826 (ins _.FRC:$src1, _.FRC:$src2), 8827 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8828 Sched<[sched]>; 8829 let mayLoad = 1 in 8830 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8831 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 8832 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8833 Sched<[sched.Folded, ReadAfterLd]>; 8834 } 8835 } 8836 8837 let Predicates = [HasAVX512] in { 8838 def : Pat<(_.EltVT (fsqrt _.FRC:$src)), 8839 (!cast<Instruction>(Name#Zr) 8840 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 8841 } 8842 8843 let Predicates = [HasAVX512, OptForSize] in { 8844 def : Pat<(_.EltVT (fsqrt (load addr:$src))), 8845 (!cast<Instruction>(Name#Zm) 8846 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 8847 } 8848} 8849 8850multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 8851 X86SchedWriteSizes sched> { 8852 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 8853 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 8854 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 8855 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 8856} 8857 8858defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 8859 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 8860 8861defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 8862 8863multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 8864 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8865 let ExeDomain = _.ExeDomain in { 8866 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8867 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 8868 "$src3, $src2, $src1", "$src1, $src2, $src3", 8869 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8870 (i32 imm:$src3)))>, 8871 Sched<[sched]>; 8872 8873 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8874 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 8875 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 8876 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8877 (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B, 8878 Sched<[sched]>; 8879 8880 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8881 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 8882 OpcodeStr, 8883 "$src3, $src2, $src1", "$src1, $src2, $src3", 8884 (_.VT (X86RndScales _.RC:$src1, 8885 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, 8886 Sched<[sched.Folded, ReadAfterLd]>; 8887 8888 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 8889 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8890 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 8891 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8892 []>, Sched<[sched]>; 8893 8894 let mayLoad = 1 in 8895 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8896 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 8897 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8898 []>, Sched<[sched.Folded, ReadAfterLd]>; 8899 } 8900 } 8901 8902 let Predicates = [HasAVX512] in { 8903 def : Pat<(ffloor _.FRC:$src), 8904 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8905 _.FRC:$src, (i32 0x9)))>; 8906 def : Pat<(fceil _.FRC:$src), 8907 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8908 _.FRC:$src, (i32 0xa)))>; 8909 def : Pat<(ftrunc _.FRC:$src), 8910 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8911 _.FRC:$src, (i32 0xb)))>; 8912 def : Pat<(frint _.FRC:$src), 8913 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8914 _.FRC:$src, (i32 0x4)))>; 8915 def : Pat<(fnearbyint _.FRC:$src), 8916 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8917 _.FRC:$src, (i32 0xc)))>; 8918 } 8919 8920 let Predicates = [HasAVX512, OptForSize] in { 8921 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), 8922 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8923 addr:$src, (i32 0x9)))>; 8924 def : Pat<(fceil (_.ScalarLdFrag addr:$src)), 8925 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8926 addr:$src, (i32 0xa)))>; 8927 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), 8928 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8929 addr:$src, (i32 0xb)))>; 8930 def : Pat<(frint (_.ScalarLdFrag addr:$src)), 8931 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8932 addr:$src, (i32 0x4)))>; 8933 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), 8934 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8935 addr:$src, (i32 0xc)))>; 8936 } 8937} 8938 8939defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 8940 SchedWriteFRnd.Scl, f32x_info>, 8941 AVX512AIi8Base, EVEX_4V, 8942 EVEX_CD8<32, CD8VT1>; 8943 8944defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 8945 SchedWriteFRnd.Scl, f64x_info>, 8946 VEX_W, AVX512AIi8Base, EVEX_4V, 8947 EVEX_CD8<64, CD8VT1>; 8948 8949multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 8950 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 8951 dag OutMask, Predicate BasePredicate> { 8952 let Predicates = [BasePredicate] in { 8953 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 8954 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 8955 (extractelt _.VT:$dst, (iPTR 0))))), 8956 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 8957 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 8958 8959 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 8960 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 8961 ZeroFP))), 8962 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 8963 OutMask, _.VT:$src2, _.VT:$src1)>; 8964 } 8965} 8966 8967defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 8968 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 8969 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 8970defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 8971 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 8972 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 8973 8974multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move, 8975 X86VectorVTInfo _, PatLeaf ZeroFP, 8976 bits<8> ImmV, Predicate BasePredicate> { 8977 let Predicates = [BasePredicate] in { 8978 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask, 8979 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 8980 (extractelt _.VT:$dst, (iPTR 0))))), 8981 (!cast<Instruction>("V"#OpcPrefix#Zr_Intk) 8982 _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; 8983 8984 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask, 8985 (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))), 8986 (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz) 8987 VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; 8988 } 8989} 8990 8991defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss, 8992 v4f32x_info, fp32imm0, 0x01, HasAVX512>; 8993defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss, 8994 v4f32x_info, fp32imm0, 0x02, HasAVX512>; 8995defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd, 8996 v2f64x_info, fp64imm0, 0x01, HasAVX512>; 8997defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd, 8998 v2f64x_info, fp64imm0, 0x02, HasAVX512>; 8999 9000 9001//------------------------------------------------- 9002// Integer truncate and extend operations 9003//------------------------------------------------- 9004 9005multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9006 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9007 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9008 let ExeDomain = DestInfo.ExeDomain in 9009 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst), 9010 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1", 9011 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>, 9012 EVEX, T8XS, Sched<[sched]>; 9013 9014 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9015 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9016 (ins x86memop:$dst, SrcInfo.RC:$src), 9017 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9018 EVEX, Sched<[sched.Folded]>; 9019 9020 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9021 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9022 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9023 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9024 }//mayStore = 1, hasSideEffects = 0 9025} 9026 9027multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9028 X86VectorVTInfo DestInfo, 9029 PatFrag truncFrag, PatFrag mtruncFrag, 9030 string Name> { 9031 9032 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9033 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr) 9034 addr:$dst, SrcInfo.RC:$src)>; 9035 9036 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, 9037 (SrcInfo.VT SrcInfo.RC:$src)), 9038 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk) 9039 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9040} 9041 9042multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9043 SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched, 9044 AVX512VLVectorVTInfo VTSrcInfo, 9045 X86VectorVTInfo DestInfoZ128, 9046 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9047 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9048 X86MemOperand x86memopZ, PatFrag truncFrag, 9049 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9050 9051 let Predicates = [HasVLX, prd] in { 9052 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched, 9053 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9054 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9055 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9056 9057 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched, 9058 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9059 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9060 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9061 } 9062 let Predicates = [prd] in 9063 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched, 9064 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9065 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9066 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9067} 9068 9069multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9070 X86FoldableSchedWrite sched, PatFrag StoreNode, 9071 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9072 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched, 9073 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9074 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9075 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9076} 9077 9078multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9079 X86FoldableSchedWrite sched, PatFrag StoreNode, 9080 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9081 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched, 9082 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9083 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9084 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9085} 9086 9087multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9088 X86FoldableSchedWrite sched, PatFrag StoreNode, 9089 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9090 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched, 9091 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9092 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9093 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9094} 9095 9096multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9097 X86FoldableSchedWrite sched, PatFrag StoreNode, 9098 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9099 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched, 9100 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9101 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9102 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9103} 9104 9105multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9106 X86FoldableSchedWrite sched, PatFrag StoreNode, 9107 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9108 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched, 9109 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9110 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9111 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9112} 9113 9114multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9115 X86FoldableSchedWrite sched, PatFrag StoreNode, 9116 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9117 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9118 sched, avx512vl_i16_info, v16i8x_info, v16i8x_info, 9119 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9120 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9121} 9122 9123defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256, 9124 truncstorevi8, masked_truncstorevi8, X86vtrunc>; 9125defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256, 9126 truncstore_s_vi8, masked_truncstore_s_vi8>; 9127defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256, 9128 truncstore_us_vi8, masked_truncstore_us_vi8>; 9129 9130defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256, 9131 truncstorevi16, masked_truncstorevi16, X86vtrunc>; 9132defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256, 9133 truncstore_s_vi16, masked_truncstore_s_vi16>; 9134defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256, 9135 truncstore_us_vi16, masked_truncstore_us_vi16>; 9136 9137defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256, 9138 truncstorevi32, masked_truncstorevi32, X86vtrunc>; 9139defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256, 9140 truncstore_s_vi32, masked_truncstore_s_vi32>; 9141defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256, 9142 truncstore_us_vi32, masked_truncstore_us_vi32>; 9143 9144defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256, 9145 truncstorevi8, masked_truncstorevi8, X86vtrunc>; 9146defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256, 9147 truncstore_s_vi8, masked_truncstore_s_vi8>; 9148defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256, 9149 truncstore_us_vi8, masked_truncstore_us_vi8>; 9150 9151defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256, 9152 truncstorevi16, masked_truncstorevi16, X86vtrunc>; 9153defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256, 9154 truncstore_s_vi16, masked_truncstore_s_vi16>; 9155defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256, 9156 truncstore_us_vi16, masked_truncstore_us_vi16>; 9157 9158defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256, 9159 truncstorevi8, masked_truncstorevi8, X86vtrunc>; 9160defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256, 9161 truncstore_s_vi8, masked_truncstore_s_vi8>; 9162defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256, 9163 truncstore_us_vi8, masked_truncstore_us_vi8>; 9164 9165let Predicates = [HasAVX512, NoVLX] in { 9166def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9167 (v8i16 (EXTRACT_SUBREG 9168 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9169 VR256X:$src, sub_ymm)))), sub_xmm))>; 9170def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9171 (v4i32 (EXTRACT_SUBREG 9172 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9173 VR256X:$src, sub_ymm)))), sub_xmm))>; 9174} 9175 9176let Predicates = [HasBWI, NoVLX] in { 9177def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9178 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9179 VR256X:$src, sub_ymm))), sub_xmm))>; 9180} 9181 9182multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9183 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9184 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9185 let ExeDomain = DestInfo.ExeDomain in { 9186 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9187 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9188 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9189 EVEX, Sched<[sched]>; 9190 9191 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9192 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9193 (DestInfo.VT (LdFrag addr:$src))>, 9194 EVEX, Sched<[sched.Folded]>; 9195 } 9196} 9197 9198multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9199 SDNode OpNode, SDNode InVecNode, string ExtTy, 9200 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9201 let Predicates = [HasVLX, HasBWI] in { 9202 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9203 v16i8x_info, i64mem, LdFrag, InVecNode>, 9204 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9205 9206 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9207 v16i8x_info, i128mem, LdFrag, OpNode>, 9208 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9209 } 9210 let Predicates = [HasBWI] in { 9211 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9212 v32i8x_info, i256mem, LdFrag, OpNode>, 9213 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9214 } 9215} 9216 9217multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9218 SDNode OpNode, SDNode InVecNode, string ExtTy, 9219 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9220 let Predicates = [HasVLX, HasAVX512] in { 9221 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9222 v16i8x_info, i32mem, LdFrag, InVecNode>, 9223 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9224 9225 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9226 v16i8x_info, i64mem, LdFrag, OpNode>, 9227 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9228 } 9229 let Predicates = [HasAVX512] in { 9230 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9231 v16i8x_info, i128mem, LdFrag, OpNode>, 9232 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9233 } 9234} 9235 9236multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9237 SDNode OpNode, SDNode InVecNode, string ExtTy, 9238 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9239 let Predicates = [HasVLX, HasAVX512] in { 9240 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9241 v16i8x_info, i16mem, LdFrag, InVecNode>, 9242 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9243 9244 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9245 v16i8x_info, i32mem, LdFrag, OpNode>, 9246 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9247 } 9248 let Predicates = [HasAVX512] in { 9249 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9250 v16i8x_info, i64mem, LdFrag, OpNode>, 9251 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9252 } 9253} 9254 9255multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9256 SDNode OpNode, SDNode InVecNode, string ExtTy, 9257 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9258 let Predicates = [HasVLX, HasAVX512] in { 9259 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9260 v8i16x_info, i64mem, LdFrag, InVecNode>, 9261 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9262 9263 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9264 v8i16x_info, i128mem, LdFrag, OpNode>, 9265 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9266 } 9267 let Predicates = [HasAVX512] in { 9268 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9269 v16i16x_info, i256mem, LdFrag, OpNode>, 9270 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9271 } 9272} 9273 9274multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9275 SDNode OpNode, SDNode InVecNode, string ExtTy, 9276 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9277 let Predicates = [HasVLX, HasAVX512] in { 9278 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9279 v8i16x_info, i32mem, LdFrag, InVecNode>, 9280 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9281 9282 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9283 v8i16x_info, i64mem, LdFrag, OpNode>, 9284 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9285 } 9286 let Predicates = [HasAVX512] in { 9287 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9288 v8i16x_info, i128mem, LdFrag, OpNode>, 9289 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9290 } 9291} 9292 9293multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9294 SDNode OpNode, SDNode InVecNode, string ExtTy, 9295 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9296 9297 let Predicates = [HasVLX, HasAVX512] in { 9298 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9299 v4i32x_info, i64mem, LdFrag, InVecNode>, 9300 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9301 9302 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9303 v4i32x_info, i128mem, LdFrag, OpNode>, 9304 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9305 } 9306 let Predicates = [HasAVX512] in { 9307 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9308 v8i32x_info, i256mem, LdFrag, OpNode>, 9309 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9310 } 9311} 9312 9313defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>; 9314defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>; 9315defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>; 9316defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>; 9317defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>; 9318defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>; 9319 9320defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>; 9321defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>; 9322defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>; 9323defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>; 9324defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>; 9325defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>; 9326 9327 9328multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9329 SDNode InVecOp> { 9330 // 128-bit patterns 9331 let Predicates = [HasVLX, HasBWI] in { 9332 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9333 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9334 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9335 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9336 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), 9337 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9338 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), 9339 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9340 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), 9341 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9342 } 9343 let Predicates = [HasVLX] in { 9344 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9345 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9346 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), 9347 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9348 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), 9349 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9350 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), 9351 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9352 9353 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9354 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9355 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), 9356 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9357 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), 9358 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9359 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), 9360 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9361 9362 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9363 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9364 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9365 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9366 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), 9367 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9368 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), 9369 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9370 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))), 9371 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9372 9373 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9374 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9375 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))), 9376 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9377 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), 9378 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9379 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))), 9380 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9381 9382 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9383 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9384 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9385 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9386 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))), 9387 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9388 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))), 9389 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9390 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))), 9391 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9392 } 9393 // 256-bit patterns 9394 let Predicates = [HasVLX, HasBWI] in { 9395 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9396 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9397 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 9398 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9399 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 9400 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9401 } 9402 let Predicates = [HasVLX] in { 9403 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9404 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9405 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 9406 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9407 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 9408 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9409 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9410 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9411 9412 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9413 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9414 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 9415 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9416 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 9417 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9418 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9419 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9420 9421 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 9422 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9423 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 9424 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9425 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 9426 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9427 9428 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9429 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9430 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 9431 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9432 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 9433 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9434 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 9435 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9436 9437 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 9438 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9439 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), 9440 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9441 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), 9442 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9443 } 9444 // 512-bit patterns 9445 let Predicates = [HasBWI] in { 9446 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))), 9447 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9448 } 9449 let Predicates = [HasAVX512] in { 9450 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9451 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9452 9453 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9454 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9455 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9456 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9457 9458 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))), 9459 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9460 9461 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 9462 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9463 9464 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))), 9465 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9466 } 9467} 9468 9469defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>; 9470defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>; 9471 9472//===----------------------------------------------------------------------===// 9473// GATHER - SCATTER Operations 9474 9475// FIXME: Improve scheduling of gather/scatter instructions. 9476multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9477 X86MemOperand memop, PatFrag GatherNode, 9478 RegisterClass MaskRC = _.KRCWM> { 9479 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9480 ExeDomain = _.ExeDomain in 9481 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9482 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9483 !strconcat(OpcodeStr#_.Suffix, 9484 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9485 [(set _.RC:$dst, MaskRC:$mask_wb, 9486 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask, 9487 vectoraddr:$src2))]>, EVEX, EVEX_K, 9488 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; 9489} 9490 9491multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9492 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9493 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, 9494 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W; 9495 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512, 9496 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W; 9497let Predicates = [HasVLX] in { 9498 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9499 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W; 9500 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256, 9501 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W; 9502 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9503 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W; 9504 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9505 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W; 9506} 9507} 9508 9509multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9510 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9511 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem, 9512 mgatherv16i32>, EVEX_V512; 9513 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem, 9514 mgatherv8i64>, EVEX_V512; 9515let Predicates = [HasVLX] in { 9516 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9517 vy256xmem, mgatherv8i32>, EVEX_V256; 9518 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9519 vy128xmem, mgatherv4i64>, EVEX_V256; 9520 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9521 vx128xmem, mgatherv4i32>, EVEX_V128; 9522 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9523 vx64xmem, mgatherv2i64, VK2WM>, 9524 EVEX_V128; 9525} 9526} 9527 9528 9529defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9530 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9531 9532defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9533 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9534 9535multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9536 X86MemOperand memop, PatFrag ScatterNode, 9537 RegisterClass MaskRC = _.KRCWM> { 9538 9539let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in 9540 9541 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9542 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9543 !strconcat(OpcodeStr#_.Suffix, 9544 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9545 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src), 9546 MaskRC:$mask, vectoraddr:$dst))]>, 9547 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9548 Sched<[WriteStore]>; 9549} 9550 9551multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9552 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9553 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, 9554 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W; 9555 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512, 9556 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W; 9557let Predicates = [HasVLX] in { 9558 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9559 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W; 9560 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256, 9561 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W; 9562 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9563 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W; 9564 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9565 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W; 9566} 9567} 9568 9569multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9570 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9571 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem, 9572 mscatterv16i32>, EVEX_V512; 9573 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem, 9574 mscatterv8i64>, EVEX_V512; 9575let Predicates = [HasVLX] in { 9576 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9577 vy256xmem, mscatterv8i32>, EVEX_V256; 9578 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9579 vy128xmem, mscatterv4i64>, EVEX_V256; 9580 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9581 vx128xmem, mscatterv4i32>, EVEX_V128; 9582 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9583 vx64xmem, mscatterv2i64, VK2WM>, 9584 EVEX_V128; 9585} 9586} 9587 9588defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9589 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9590 9591defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9592 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9593 9594// prefetch 9595multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9596 RegisterClass KRC, X86MemOperand memop> { 9597 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9598 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9599 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9600 EVEX, EVEX_K, Sched<[WriteLoad]>; 9601} 9602 9603defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9604 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9605 9606defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9607 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9608 9609defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9610 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9611 9612defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9613 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9614 9615defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9616 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9617 9618defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9619 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9620 9621defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9622 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9623 9624defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9625 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9626 9627defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9628 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9629 9630defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9631 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9632 9633defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9634 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9635 9636defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9637 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9638 9639defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9640 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9641 9642defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9643 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9644 9645defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9646 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9647 9648defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9649 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9650 9651multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9652def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9653 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9654 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9655 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9656} 9657 9658multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9659 string OpcodeStr, Predicate prd> { 9660let Predicates = [prd] in 9661 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9662 9663 let Predicates = [prd, HasVLX] in { 9664 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9665 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9666 } 9667} 9668 9669defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9670defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9671defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9672defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9673 9674multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9675 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9676 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9677 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9678 EVEX, Sched<[WriteMove]>; 9679} 9680 9681// Use 512bit version to implement 128/256 bit in case NoVLX. 9682multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9683 X86VectorVTInfo _, 9684 string Name> { 9685 9686 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9687 (_.KVT (COPY_TO_REGCLASS 9688 (!cast<Instruction>(Name#"Zrr") 9689 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9690 _.RC:$src, _.SubRegIdx)), 9691 _.KRC))>; 9692} 9693 9694multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9695 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9696 let Predicates = [prd] in 9697 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9698 EVEX_V512; 9699 9700 let Predicates = [prd, HasVLX] in { 9701 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9702 EVEX_V256; 9703 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9704 EVEX_V128; 9705 } 9706 let Predicates = [prd, NoVLX] in { 9707 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9708 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9709 } 9710} 9711 9712defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9713 avx512vl_i8_info, HasBWI>; 9714defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9715 avx512vl_i16_info, HasBWI>, VEX_W; 9716defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9717 avx512vl_i32_info, HasDQI>; 9718defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9719 avx512vl_i64_info, HasDQI>, VEX_W; 9720 9721// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9722// is available, but BWI is not. We can't handle this in lowering because 9723// a target independent DAG combine likes to combine sext and trunc. 9724let Predicates = [HasDQI, NoBWI] in { 9725 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9726 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9727 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9728 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9729} 9730 9731//===----------------------------------------------------------------------===// 9732// AVX-512 - COMPRESS and EXPAND 9733// 9734 9735multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9736 string OpcodeStr, X86FoldableSchedWrite sched> { 9737 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9738 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9739 (_.VT (X86compress _.RC:$src1))>, AVX5128IBase, 9740 Sched<[sched]>; 9741 9742 let mayStore = 1, hasSideEffects = 0 in 9743 def mr : AVX5128I<opc, MRMDestMem, (outs), 9744 (ins _.MemOp:$dst, _.RC:$src), 9745 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9746 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9747 Sched<[sched.Folded]>; 9748 9749 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9750 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9751 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9752 []>, 9753 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9754 Sched<[sched.Folded]>; 9755} 9756 9757multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9758 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask, 9759 (_.VT _.RC:$src)), 9760 (!cast<Instruction>(Name#_.ZSuffix##mrk) 9761 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 9762} 9763 9764multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 9765 X86FoldableSchedWrite sched, 9766 AVX512VLVectorVTInfo VTInfo, 9767 Predicate Pred = HasAVX512> { 9768 let Predicates = [Pred] in 9769 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 9770 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9771 9772 let Predicates = [Pred, HasVLX] in { 9773 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 9774 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9775 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 9776 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9777 } 9778} 9779 9780// FIXME: Is there a better scheduler class for VPCOMPRESS? 9781defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 9782 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 9783defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 9784 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 9785defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 9786 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 9787defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 9788 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 9789 9790// expand 9791multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 9792 string OpcodeStr, X86FoldableSchedWrite sched> { 9793 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9794 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9795 (_.VT (X86expand _.RC:$src1))>, AVX5128IBase, 9796 Sched<[sched]>; 9797 9798 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9799 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 9800 (_.VT (X86expand (_.VT (bitconvert 9801 (_.LdFrag addr:$src1)))))>, 9802 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 9803 Sched<[sched.Folded, ReadAfterLd]>; 9804} 9805 9806multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9807 9808 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 9809 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 9810 _.KRCWM:$mask, addr:$src)>; 9811 9812 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 9813 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 9814 _.KRCWM:$mask, addr:$src)>; 9815 9816 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 9817 (_.VT _.RC:$src0))), 9818 (!cast<Instruction>(Name#_.ZSuffix##rmk) 9819 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 9820} 9821 9822multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 9823 X86FoldableSchedWrite sched, 9824 AVX512VLVectorVTInfo VTInfo, 9825 Predicate Pred = HasAVX512> { 9826 let Predicates = [Pred] in 9827 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 9828 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9829 9830 let Predicates = [Pred, HasVLX] in { 9831 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 9832 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9833 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 9834 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9835 } 9836} 9837 9838// FIXME: Is there a better scheduler class for VPEXPAND? 9839defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 9840 avx512vl_i32_info>, EVEX; 9841defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 9842 avx512vl_i64_info>, EVEX, VEX_W; 9843defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 9844 avx512vl_f32_info>, EVEX; 9845defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 9846 avx512vl_f64_info>, EVEX, VEX_W; 9847 9848//handle instruction reg_vec1 = op(reg_vec,imm) 9849// op(mem_vec,imm) 9850// op(broadcast(eltVt),imm) 9851//all instruction created with FROUND_CURRENT 9852multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 9853 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9854 let ExeDomain = _.ExeDomain in { 9855 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9856 (ins _.RC:$src1, i32u8imm:$src2), 9857 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 9858 (OpNode (_.VT _.RC:$src1), 9859 (i32 imm:$src2))>, Sched<[sched]>; 9860 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9861 (ins _.MemOp:$src1, i32u8imm:$src2), 9862 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 9863 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 9864 (i32 imm:$src2))>, 9865 Sched<[sched.Folded, ReadAfterLd]>; 9866 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9867 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 9868 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, 9869 "${src1}"##_.BroadcastStr##", $src2", 9870 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))), 9871 (i32 imm:$src2))>, EVEX_B, 9872 Sched<[sched.Folded, ReadAfterLd]>; 9873 } 9874} 9875 9876//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 9877multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 9878 SDNode OpNode, X86FoldableSchedWrite sched, 9879 X86VectorVTInfo _> { 9880 let ExeDomain = _.ExeDomain in 9881 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9882 (ins _.RC:$src1, i32u8imm:$src2), 9883 OpcodeStr##_.Suffix, "$src2, {sae}, $src1", 9884 "$src1, {sae}, $src2", 9885 (OpNode (_.VT _.RC:$src1), 9886 (i32 imm:$src2), 9887 (i32 FROUND_NO_EXC))>, 9888 EVEX_B, Sched<[sched]>; 9889} 9890 9891multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 9892 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 9893 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ 9894 let Predicates = [prd] in { 9895 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, 9896 _.info512>, 9897 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, 9898 sched.ZMM, _.info512>, EVEX_V512; 9899 } 9900 let Predicates = [prd, HasVLX] in { 9901 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, 9902 _.info128>, EVEX_V128; 9903 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, 9904 _.info256>, EVEX_V256; 9905 } 9906} 9907 9908//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9909// op(reg_vec2,mem_vec,imm) 9910// op(reg_vec2,broadcast(eltVt),imm) 9911//all instruction created with FROUND_CURRENT 9912multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 9913 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9914 let ExeDomain = _.ExeDomain in { 9915 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9916 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 9917 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9918 (OpNode (_.VT _.RC:$src1), 9919 (_.VT _.RC:$src2), 9920 (i32 imm:$src3))>, 9921 Sched<[sched]>; 9922 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9923 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 9924 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9925 (OpNode (_.VT _.RC:$src1), 9926 (_.VT (bitconvert (_.LdFrag addr:$src2))), 9927 (i32 imm:$src3))>, 9928 Sched<[sched.Folded, ReadAfterLd]>; 9929 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9930 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9931 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 9932 "$src1, ${src2}"##_.BroadcastStr##", $src3", 9933 (OpNode (_.VT _.RC:$src1), 9934 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 9935 (i32 imm:$src3))>, EVEX_B, 9936 Sched<[sched.Folded, ReadAfterLd]>; 9937 } 9938} 9939 9940//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9941// op(reg_vec2,mem_vec,imm) 9942multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 9943 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 9944 X86VectorVTInfo SrcInfo>{ 9945 let ExeDomain = DestInfo.ExeDomain in { 9946 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9947 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 9948 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9949 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 9950 (SrcInfo.VT SrcInfo.RC:$src2), 9951 (i8 imm:$src3)))>, 9952 Sched<[sched]>; 9953 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9954 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 9955 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9956 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 9957 (SrcInfo.VT (bitconvert 9958 (SrcInfo.LdFrag addr:$src2))), 9959 (i8 imm:$src3)))>, 9960 Sched<[sched.Folded, ReadAfterLd]>; 9961 } 9962} 9963 9964//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9965// op(reg_vec2,mem_vec,imm) 9966// op(reg_vec2,broadcast(eltVt),imm) 9967multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 9968 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 9969 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 9970 9971 let ExeDomain = _.ExeDomain in 9972 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9973 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 9974 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 9975 "$src1, ${src2}"##_.BroadcastStr##", $src3", 9976 (OpNode (_.VT _.RC:$src1), 9977 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 9978 (i8 imm:$src3))>, EVEX_B, 9979 Sched<[sched.Folded, ReadAfterLd]>; 9980} 9981 9982//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9983// op(reg_vec2,mem_scalar,imm) 9984multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 9985 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9986 let ExeDomain = _.ExeDomain in { 9987 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9988 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 9989 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9990 (OpNode (_.VT _.RC:$src1), 9991 (_.VT _.RC:$src2), 9992 (i32 imm:$src3))>, 9993 Sched<[sched]>; 9994 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9995 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9996 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9997 (OpNode (_.VT _.RC:$src1), 9998 (_.VT (scalar_to_vector 9999 (_.ScalarLdFrag addr:$src2))), 10000 (i32 imm:$src3))>, 10001 Sched<[sched.Folded, ReadAfterLd]>; 10002 } 10003} 10004 10005//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10006multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10007 SDNode OpNode, X86FoldableSchedWrite sched, 10008 X86VectorVTInfo _> { 10009 let ExeDomain = _.ExeDomain in 10010 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10011 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10012 OpcodeStr, "$src3, {sae}, $src2, $src1", 10013 "$src1, $src2, {sae}, $src3", 10014 (OpNode (_.VT _.RC:$src1), 10015 (_.VT _.RC:$src2), 10016 (i32 imm:$src3), 10017 (i32 FROUND_NO_EXC))>, 10018 EVEX_B, Sched<[sched]>; 10019} 10020 10021//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10022multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10023 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10024 let ExeDomain = _.ExeDomain in 10025 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10026 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10027 OpcodeStr, "$src3, {sae}, $src2, $src1", 10028 "$src1, $src2, {sae}, $src3", 10029 (OpNode (_.VT _.RC:$src1), 10030 (_.VT _.RC:$src2), 10031 (i32 imm:$src3), 10032 (i32 FROUND_NO_EXC))>, 10033 EVEX_B, Sched<[sched]>; 10034} 10035 10036multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10037 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10038 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ 10039 let Predicates = [prd] in { 10040 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10041 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>, 10042 EVEX_V512; 10043 10044 } 10045 let Predicates = [prd, HasVLX] in { 10046 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10047 EVEX_V128; 10048 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10049 EVEX_V256; 10050 } 10051} 10052 10053multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10054 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10055 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10056 let Predicates = [Pred] in { 10057 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10058 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10059 } 10060 let Predicates = [Pred, HasVLX] in { 10061 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10062 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10063 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10064 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10065 } 10066} 10067 10068multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10069 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10070 Predicate Pred = HasAVX512> { 10071 let Predicates = [Pred] in { 10072 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10073 EVEX_V512; 10074 } 10075 let Predicates = [Pred, HasVLX] in { 10076 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10077 EVEX_V128; 10078 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10079 EVEX_V256; 10080 } 10081} 10082 10083multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10084 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10085 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> { 10086 let Predicates = [prd] in { 10087 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10088 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>; 10089 } 10090} 10091 10092multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10093 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, 10094 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ 10095 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10096 opcPs, OpNode, OpNodeRnd, sched, prd>, 10097 EVEX_CD8<32, CD8VF>; 10098 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10099 opcPd, OpNode, OpNodeRnd, sched, prd>, 10100 EVEX_CD8<64, CD8VF>, VEX_W; 10101} 10102 10103defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10104 X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>, 10105 AVX512AIi8Base, EVEX; 10106defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10107 X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>, 10108 AVX512AIi8Base, EVEX; 10109defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10110 X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>, 10111 AVX512AIi8Base, EVEX; 10112 10113defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10114 0x50, X86VRange, X86VRangeRnd, 10115 SchedWriteFAdd, HasDQI>, 10116 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10117defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10118 0x50, X86VRange, X86VRangeRnd, 10119 SchedWriteFAdd, HasDQI>, 10120 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10121 10122defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10123 f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, 10124 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10125defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10126 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, 10127 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10128 10129defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10130 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>, 10131 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10132defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10133 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>, 10134 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10135 10136defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10137 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>, 10138 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10139defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10140 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>, 10141 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10142 10143 10144multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> { 10145 // Register 10146 def : Pat<(_.VT (ffloor _.RC:$src)), 10147 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10148 _.RC:$src, (i32 0x9))>; 10149 def : Pat<(_.VT (fnearbyint _.RC:$src)), 10150 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10151 _.RC:$src, (i32 0xC))>; 10152 def : Pat<(_.VT (fceil _.RC:$src)), 10153 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10154 _.RC:$src, (i32 0xA))>; 10155 def : Pat<(_.VT (frint _.RC:$src)), 10156 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10157 _.RC:$src, (i32 0x4))>; 10158 def : Pat<(_.VT (ftrunc _.RC:$src)), 10159 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10160 _.RC:$src, (i32 0xB))>; 10161 10162 // Merge-masking 10163 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)), 10164 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10165 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>; 10166 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)), 10167 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10168 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>; 10169 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)), 10170 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10171 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>; 10172 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)), 10173 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10174 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>; 10175 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)), 10176 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10177 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>; 10178 10179 // Zero-masking 10180 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), 10181 _.ImmAllZerosV)), 10182 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10183 _.KRCWM:$mask, _.RC:$src, (i32 0x9))>; 10184 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), 10185 _.ImmAllZerosV)), 10186 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10187 _.KRCWM:$mask, _.RC:$src, (i32 0xC))>; 10188 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), 10189 _.ImmAllZerosV)), 10190 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10191 _.KRCWM:$mask, _.RC:$src, (i32 0xA))>; 10192 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), 10193 _.ImmAllZerosV)), 10194 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10195 _.KRCWM:$mask, _.RC:$src, (i32 0x4))>; 10196 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), 10197 _.ImmAllZerosV)), 10198 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10199 _.KRCWM:$mask, _.RC:$src, (i32 0xB))>; 10200 10201 // Load 10202 def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))), 10203 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10204 addr:$src, (i32 0x9))>; 10205 def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))), 10206 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10207 addr:$src, (i32 0xC))>; 10208 def : Pat<(_.VT (fceil (_.LdFrag addr:$src))), 10209 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10210 addr:$src, (i32 0xA))>; 10211 def : Pat<(_.VT (frint (_.LdFrag addr:$src))), 10212 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10213 addr:$src, (i32 0x4))>; 10214 def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))), 10215 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10216 addr:$src, (i32 0xB))>; 10217 10218 // Merge-masking + load 10219 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)), 10220 _.RC:$dst)), 10221 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10222 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10223 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)), 10224 _.RC:$dst)), 10225 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10226 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10227 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)), 10228 _.RC:$dst)), 10229 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10230 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10231 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)), 10232 _.RC:$dst)), 10233 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10234 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10235 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)), 10236 _.RC:$dst)), 10237 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10238 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10239 10240 // Zero-masking + load 10241 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)), 10242 _.ImmAllZerosV)), 10243 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10244 _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10245 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)), 10246 _.ImmAllZerosV)), 10247 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10248 _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10249 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)), 10250 _.ImmAllZerosV)), 10251 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10252 _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10253 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)), 10254 _.ImmAllZerosV)), 10255 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10256 _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10257 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)), 10258 _.ImmAllZerosV)), 10259 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10260 _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10261 10262 // Broadcast load 10263 def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10264 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10265 addr:$src, (i32 0x9))>; 10266 def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10267 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10268 addr:$src, (i32 0xC))>; 10269 def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10270 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10271 addr:$src, (i32 0xA))>; 10272 def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10273 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10274 addr:$src, (i32 0x4))>; 10275 def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10276 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10277 addr:$src, (i32 0xB))>; 10278 10279 // Merge-masking + broadcast load 10280 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10281 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10282 _.RC:$dst)), 10283 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10284 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10285 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10286 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10287 _.RC:$dst)), 10288 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10289 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10290 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10291 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10292 _.RC:$dst)), 10293 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10294 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10295 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10296 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10297 _.RC:$dst)), 10298 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10299 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10300 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10301 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10302 _.RC:$dst)), 10303 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10304 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10305 10306 // Zero-masking + broadcast load 10307 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10308 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10309 _.ImmAllZerosV)), 10310 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10311 _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10312 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10313 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10314 _.ImmAllZerosV)), 10315 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10316 _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10317 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10318 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10319 _.ImmAllZerosV)), 10320 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10321 _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10322 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10323 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10324 _.ImmAllZerosV)), 10325 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10326 _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10327 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10328 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10329 _.ImmAllZerosV)), 10330 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10331 _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10332} 10333 10334let Predicates = [HasAVX512] in { 10335 defm : AVX512_rndscale_lowering<v16f32_info, "PS">; 10336 defm : AVX512_rndscale_lowering<v8f64_info, "PD">; 10337} 10338 10339let Predicates = [HasVLX] in { 10340 defm : AVX512_rndscale_lowering<v8f32x_info, "PS">; 10341 defm : AVX512_rndscale_lowering<v4f64x_info, "PD">; 10342 defm : AVX512_rndscale_lowering<v4f32x_info, "PS">; 10343 defm : AVX512_rndscale_lowering<v2f64x_info, "PD">; 10344} 10345 10346multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10347 X86FoldableSchedWrite sched, 10348 X86VectorVTInfo _, 10349 X86VectorVTInfo CastInfo, 10350 string EVEX2VEXOvrd> { 10351 let ExeDomain = _.ExeDomain in { 10352 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10353 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10354 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10355 (_.VT (bitconvert 10356 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10357 (i8 imm:$src3)))))>, 10358 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10359 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10360 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10361 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10362 (_.VT 10363 (bitconvert 10364 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10365 (bitconvert (_.LdFrag addr:$src2)), 10366 (i8 imm:$src3)))))>, 10367 Sched<[sched.Folded, ReadAfterLd]>, 10368 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10369 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10370 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10371 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10372 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10373 (_.VT 10374 (bitconvert 10375 (CastInfo.VT 10376 (X86Shuf128 _.RC:$src1, 10377 (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 10378 (i8 imm:$src3)))))>, EVEX_B, 10379 Sched<[sched.Folded, ReadAfterLd]>; 10380 } 10381} 10382 10383multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10384 AVX512VLVectorVTInfo _, 10385 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10386 string EVEX2VEXOvrd>{ 10387 let Predicates = [HasAVX512] in 10388 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10389 _.info512, CastInfo.info512, "">, EVEX_V512; 10390 10391 let Predicates = [HasAVX512, HasVLX] in 10392 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10393 _.info256, CastInfo.info256, 10394 EVEX2VEXOvrd>, EVEX_V256; 10395} 10396 10397defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10398 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10399defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10400 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10401defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10402 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10403defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10404 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10405 10406let Predicates = [HasAVX512] in { 10407// Provide fallback in case the load node that is used in the broadcast 10408// patterns above is used by additional users, which prevents the pattern 10409// selection. 10410def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 10411 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10412 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10413 0)>; 10414def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 10415 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10416 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10417 0)>; 10418 10419def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 10420 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10421 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10422 0)>; 10423def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 10424 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10425 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10426 0)>; 10427 10428def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 10429 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10430 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10431 0)>; 10432 10433def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 10434 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10435 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10436 0)>; 10437} 10438 10439multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10440 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10441 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10442 // instantiation of this class. 10443 let ExeDomain = _.ExeDomain in { 10444 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10445 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10446 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10447 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>, 10448 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10449 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10450 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10451 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10452 (_.VT (X86VAlign _.RC:$src1, 10453 (bitconvert (_.LdFrag addr:$src2)), 10454 (i8 imm:$src3)))>, 10455 Sched<[sched.Folded, ReadAfterLd]>, 10456 EVEX2VEXOverride<"VPALIGNRrmi">; 10457 10458 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10459 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10460 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10461 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10462 (X86VAlign _.RC:$src1, 10463 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 10464 (i8 imm:$src3))>, EVEX_B, 10465 Sched<[sched.Folded, ReadAfterLd]>; 10466 } 10467} 10468 10469multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10470 AVX512VLVectorVTInfo _> { 10471 let Predicates = [HasAVX512] in { 10472 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10473 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10474 } 10475 let Predicates = [HasAVX512, HasVLX] in { 10476 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10477 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10478 // We can't really override the 256-bit version so change it back to unset. 10479 let EVEX2VEXOverride = ? in 10480 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10481 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10482 } 10483} 10484 10485defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10486 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10487defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10488 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10489 VEX_W; 10490 10491defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10492 SchedWriteShuffle, avx512vl_i8_info, 10493 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10494 10495// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10496// into vpalignr. 10497def ValignqImm32XForm : SDNodeXForm<imm, [{ 10498 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10499}]>; 10500def ValignqImm8XForm : SDNodeXForm<imm, [{ 10501 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10502}]>; 10503def ValigndImm8XForm : SDNodeXForm<imm, [{ 10504 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10505}]>; 10506 10507multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10508 X86VectorVTInfo From, X86VectorVTInfo To, 10509 SDNodeXForm ImmXForm> { 10510 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10511 (bitconvert 10512 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10513 imm:$src3))), 10514 To.RC:$src0)), 10515 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10516 To.RC:$src1, To.RC:$src2, 10517 (ImmXForm imm:$src3))>; 10518 10519 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10520 (bitconvert 10521 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10522 imm:$src3))), 10523 To.ImmAllZerosV)), 10524 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10525 To.RC:$src1, To.RC:$src2, 10526 (ImmXForm imm:$src3))>; 10527 10528 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10529 (bitconvert 10530 (From.VT (OpNode From.RC:$src1, 10531 (bitconvert (To.LdFrag addr:$src2)), 10532 imm:$src3))), 10533 To.RC:$src0)), 10534 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10535 To.RC:$src1, addr:$src2, 10536 (ImmXForm imm:$src3))>; 10537 10538 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10539 (bitconvert 10540 (From.VT (OpNode From.RC:$src1, 10541 (bitconvert (To.LdFrag addr:$src2)), 10542 imm:$src3))), 10543 To.ImmAllZerosV)), 10544 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10545 To.RC:$src1, addr:$src2, 10546 (ImmXForm imm:$src3))>; 10547} 10548 10549multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10550 X86VectorVTInfo From, 10551 X86VectorVTInfo To, 10552 SDNodeXForm ImmXForm> : 10553 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10554 def : Pat<(From.VT (OpNode From.RC:$src1, 10555 (bitconvert (To.VT (X86VBroadcast 10556 (To.ScalarLdFrag addr:$src2)))), 10557 imm:$src3)), 10558 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10559 (ImmXForm imm:$src3))>; 10560 10561 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10562 (bitconvert 10563 (From.VT (OpNode From.RC:$src1, 10564 (bitconvert 10565 (To.VT (X86VBroadcast 10566 (To.ScalarLdFrag addr:$src2)))), 10567 imm:$src3))), 10568 To.RC:$src0)), 10569 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10570 To.RC:$src1, addr:$src2, 10571 (ImmXForm imm:$src3))>; 10572 10573 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10574 (bitconvert 10575 (From.VT (OpNode From.RC:$src1, 10576 (bitconvert 10577 (To.VT (X86VBroadcast 10578 (To.ScalarLdFrag addr:$src2)))), 10579 imm:$src3))), 10580 To.ImmAllZerosV)), 10581 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10582 To.RC:$src1, addr:$src2, 10583 (ImmXForm imm:$src3))>; 10584} 10585 10586let Predicates = [HasAVX512] in { 10587 // For 512-bit we lower to the widest element type we can. So we only need 10588 // to handle converting valignq to valignd. 10589 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10590 v16i32_info, ValignqImm32XForm>; 10591} 10592 10593let Predicates = [HasVLX] in { 10594 // For 128-bit we lower to the widest element type we can. So we only need 10595 // to handle converting valignq to valignd. 10596 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10597 v4i32x_info, ValignqImm32XForm>; 10598 // For 256-bit we lower to the widest element type we can. So we only need 10599 // to handle converting valignq to valignd. 10600 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10601 v8i32x_info, ValignqImm32XForm>; 10602} 10603 10604let Predicates = [HasVLX, HasBWI] in { 10605 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10606 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10607 v16i8x_info, ValignqImm8XForm>; 10608 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10609 v16i8x_info, ValigndImm8XForm>; 10610} 10611 10612defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10613 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10614 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10615 10616multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10617 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10618 let ExeDomain = _.ExeDomain in { 10619 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10620 (ins _.RC:$src1), OpcodeStr, 10621 "$src1", "$src1", 10622 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase, 10623 Sched<[sched]>; 10624 10625 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10626 (ins _.MemOp:$src1), OpcodeStr, 10627 "$src1", "$src1", 10628 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>, 10629 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10630 Sched<[sched.Folded]>; 10631 } 10632} 10633 10634multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10635 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10636 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10637 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10638 (ins _.ScalarMemOp:$src1), OpcodeStr, 10639 "${src1}"##_.BroadcastStr, 10640 "${src1}"##_.BroadcastStr, 10641 (_.VT (OpNode (X86VBroadcast 10642 (_.ScalarLdFrag addr:$src1))))>, 10643 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10644 Sched<[sched.Folded]>; 10645} 10646 10647multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10648 X86SchedWriteWidths sched, 10649 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10650 let Predicates = [prd] in 10651 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10652 EVEX_V512; 10653 10654 let Predicates = [prd, HasVLX] in { 10655 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10656 EVEX_V256; 10657 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10658 EVEX_V128; 10659 } 10660} 10661 10662multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10663 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10664 Predicate prd> { 10665 let Predicates = [prd] in 10666 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10667 EVEX_V512; 10668 10669 let Predicates = [prd, HasVLX] in { 10670 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10671 EVEX_V256; 10672 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10673 EVEX_V128; 10674 } 10675} 10676 10677multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10678 SDNode OpNode, X86SchedWriteWidths sched, 10679 Predicate prd> { 10680 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10681 avx512vl_i64_info, prd>, VEX_W; 10682 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10683 avx512vl_i32_info, prd>; 10684} 10685 10686multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10687 SDNode OpNode, X86SchedWriteWidths sched, 10688 Predicate prd> { 10689 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10690 avx512vl_i16_info, prd>, VEX_WIG; 10691 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10692 avx512vl_i8_info, prd>, VEX_WIG; 10693} 10694 10695multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10696 bits<8> opc_d, bits<8> opc_q, 10697 string OpcodeStr, SDNode OpNode, 10698 X86SchedWriteWidths sched> { 10699 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10700 HasAVX512>, 10701 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10702 HasBWI>; 10703} 10704 10705defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10706 SchedWriteVecALU>; 10707 10708// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10709let Predicates = [HasAVX512, NoVLX] in { 10710 def : Pat<(v4i64 (abs VR256X:$src)), 10711 (EXTRACT_SUBREG 10712 (VPABSQZrr 10713 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10714 sub_ymm)>; 10715 def : Pat<(v2i64 (abs VR128X:$src)), 10716 (EXTRACT_SUBREG 10717 (VPABSQZrr 10718 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10719 sub_xmm)>; 10720} 10721 10722// Use 512bit version to implement 128/256 bit. 10723multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10724 AVX512VLVectorVTInfo _, Predicate prd> { 10725 let Predicates = [prd, NoVLX] in { 10726 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), 10727 (EXTRACT_SUBREG 10728 (!cast<Instruction>(InstrStr # "Zrr") 10729 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10730 _.info256.RC:$src1, 10731 _.info256.SubRegIdx)), 10732 _.info256.SubRegIdx)>; 10733 10734 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), 10735 (EXTRACT_SUBREG 10736 (!cast<Instruction>(InstrStr # "Zrr") 10737 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10738 _.info128.RC:$src1, 10739 _.info128.SubRegIdx)), 10740 _.info128.SubRegIdx)>; 10741 } 10742} 10743 10744defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10745 SchedWriteVecIMul, HasCDI>; 10746 10747// FIXME: Is there a better scheduler class for VPCONFLICT? 10748defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10749 SchedWriteVecALU, HasCDI>; 10750 10751// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10752defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10753defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10754 10755//===---------------------------------------------------------------------===// 10756// Counts number of ones - VPOPCNTD and VPOPCNTQ 10757//===---------------------------------------------------------------------===// 10758 10759// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10760defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10761 SchedWriteVecALU, HasVPOPCNTDQ>; 10762 10763defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10764defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10765 10766//===---------------------------------------------------------------------===// 10767// Replicate Single FP - MOVSHDUP and MOVSLDUP 10768//===---------------------------------------------------------------------===// 10769 10770multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10771 X86SchedWriteWidths sched> { 10772 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10773 avx512vl_f32_info, HasAVX512>, XS; 10774} 10775 10776defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10777 SchedWriteFShuffle>; 10778defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10779 SchedWriteFShuffle>; 10780 10781//===----------------------------------------------------------------------===// 10782// AVX-512 - MOVDDUP 10783//===----------------------------------------------------------------------===// 10784 10785multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode, 10786 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10787 let ExeDomain = _.ExeDomain in { 10788 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10789 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10790 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX, 10791 Sched<[sched]>; 10792 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10793 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10794 (_.VT (OpNode (_.VT (scalar_to_vector 10795 (_.ScalarLdFrag addr:$src)))))>, 10796 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10797 Sched<[sched.Folded]>; 10798 } 10799} 10800 10801multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10802 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10803 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10804 VTInfo.info512>, EVEX_V512; 10805 10806 let Predicates = [HasAVX512, HasVLX] in { 10807 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10808 VTInfo.info256>, EVEX_V256; 10809 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM, 10810 VTInfo.info128>, EVEX_V128; 10811 } 10812} 10813 10814multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10815 X86SchedWriteWidths sched> { 10816 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10817 avx512vl_f64_info>, XD, VEX_W; 10818} 10819 10820defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10821 10822let Predicates = [HasVLX] in { 10823def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), 10824 (VMOVDDUPZ128rm addr:$src)>; 10825def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10826 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10827def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), 10828 (VMOVDDUPZ128rm addr:$src)>; 10829 10830def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10831 (v2f64 VR128X:$src0)), 10832 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10833 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10834def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10835 (bitconvert (v4i32 immAllZerosV))), 10836 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10837 10838def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), 10839 (v2f64 VR128X:$src0)), 10840 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10841def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), 10842 (bitconvert (v4i32 immAllZerosV))), 10843 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10844 10845def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), 10846 (v2f64 VR128X:$src0)), 10847 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10848def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), 10849 (bitconvert (v4i32 immAllZerosV))), 10850 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10851} 10852 10853//===----------------------------------------------------------------------===// 10854// AVX-512 - Unpack Instructions 10855//===----------------------------------------------------------------------===// 10856 10857defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, 10858 SchedWriteFShuffleSizes, 0, 1>; 10859defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, 10860 SchedWriteFShuffleSizes>; 10861 10862defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10863 SchedWriteShuffle, HasBWI>; 10864defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10865 SchedWriteShuffle, HasBWI>; 10866defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10867 SchedWriteShuffle, HasBWI>; 10868defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10869 SchedWriteShuffle, HasBWI>; 10870 10871defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10872 SchedWriteShuffle, HasAVX512>; 10873defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10874 SchedWriteShuffle, HasAVX512>; 10875defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10876 SchedWriteShuffle, HasAVX512>; 10877defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10878 SchedWriteShuffle, HasAVX512>; 10879 10880//===----------------------------------------------------------------------===// 10881// AVX-512 - Extract & Insert Integer Instructions 10882//===----------------------------------------------------------------------===// 10883 10884multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10885 X86VectorVTInfo _> { 10886 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10887 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10888 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10889 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), 10890 addr:$dst)]>, 10891 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10892} 10893 10894multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10895 let Predicates = [HasBWI] in { 10896 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10897 (ins _.RC:$src1, u8imm:$src2), 10898 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10899 [(set GR32orGR64:$dst, 10900 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, 10901 EVEX, TAPD, Sched<[WriteVecExtract]>; 10902 10903 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10904 } 10905} 10906 10907multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10908 let Predicates = [HasBWI] in { 10909 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10910 (ins _.RC:$src1, u8imm:$src2), 10911 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10912 [(set GR32orGR64:$dst, 10913 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, 10914 EVEX, PD, Sched<[WriteVecExtract]>; 10915 10916 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10917 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10918 (ins _.RC:$src1, u8imm:$src2), 10919 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10920 EVEX, TAPD, FoldGenData<NAME#rr>, 10921 Sched<[WriteVecExtract]>; 10922 10923 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10924 } 10925} 10926 10927multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10928 RegisterClass GRC> { 10929 let Predicates = [HasDQI] in { 10930 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10931 (ins _.RC:$src1, u8imm:$src2), 10932 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10933 [(set GRC:$dst, 10934 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10935 EVEX, TAPD, Sched<[WriteVecExtract]>; 10936 10937 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10938 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10939 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10940 [(store (extractelt (_.VT _.RC:$src1), 10941 imm:$src2),addr:$dst)]>, 10942 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10943 Sched<[WriteVecExtractSt]>; 10944 } 10945} 10946 10947defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10948defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10949defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10950defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 10951 10952multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10953 X86VectorVTInfo _, PatFrag LdFrag> { 10954 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 10955 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10956 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10957 [(set _.RC:$dst, 10958 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, 10959 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>; 10960} 10961 10962multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10963 X86VectorVTInfo _, PatFrag LdFrag> { 10964 let Predicates = [HasBWI] in { 10965 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10966 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 10967 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10968 [(set _.RC:$dst, 10969 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, 10970 Sched<[WriteVecInsert]>; 10971 10972 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>; 10973 } 10974} 10975 10976multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 10977 X86VectorVTInfo _, RegisterClass GRC> { 10978 let Predicates = [HasDQI] in { 10979 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10980 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 10981 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10982 [(set _.RC:$dst, 10983 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 10984 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 10985 10986 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 10987 _.ScalarLdFrag>, TAPD; 10988 } 10989} 10990 10991defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 10992 extloadi8>, TAPD, VEX_WIG; 10993defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 10994 extloadi16>, PD, VEX_WIG; 10995defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 10996defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 10997 10998//===----------------------------------------------------------------------===// 10999// VSHUFPS - VSHUFPD Operations 11000//===----------------------------------------------------------------------===// 11001 11002multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 11003 AVX512VLVectorVTInfo VTInfo_FP>{ 11004 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11005 SchedWriteFShuffle>, 11006 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11007 AVX512AIi8Base, EVEX_4V; 11008} 11009 11010defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 11011defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 11012 11013//===----------------------------------------------------------------------===// 11014// AVX-512 - Byte shift Left/Right 11015//===----------------------------------------------------------------------===// 11016 11017// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well? 11018multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11019 Format MRMm, string OpcodeStr, 11020 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11021 def rr : AVX512<opc, MRMr, 11022 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11023 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11024 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>, 11025 Sched<[sched]>; 11026 def rm : AVX512<opc, MRMm, 11027 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11028 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11029 [(set _.RC:$dst,(_.VT (OpNode 11030 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11031 (i8 imm:$src2))))]>, 11032 Sched<[sched.Folded, ReadAfterLd]>; 11033} 11034 11035multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11036 Format MRMm, string OpcodeStr, 11037 X86SchedWriteWidths sched, Predicate prd>{ 11038 let Predicates = [prd] in 11039 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11040 sched.ZMM, v64i8_info>, EVEX_V512; 11041 let Predicates = [prd, HasVLX] in { 11042 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11043 sched.YMM, v32i8x_info>, EVEX_V256; 11044 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11045 sched.XMM, v16i8x_info>, EVEX_V128; 11046 } 11047} 11048defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11049 SchedWriteShuffle, HasBWI>, 11050 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11051defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11052 SchedWriteShuffle, HasBWI>, 11053 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11054 11055multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11056 string OpcodeStr, X86FoldableSchedWrite sched, 11057 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11058 def rr : AVX512BI<opc, MRMSrcReg, 11059 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11060 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11061 [(set _dst.RC:$dst,(_dst.VT 11062 (OpNode (_src.VT _src.RC:$src1), 11063 (_src.VT _src.RC:$src2))))]>, 11064 Sched<[sched]>; 11065 def rm : AVX512BI<opc, MRMSrcMem, 11066 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11067 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11068 [(set _dst.RC:$dst,(_dst.VT 11069 (OpNode (_src.VT _src.RC:$src1), 11070 (_src.VT (bitconvert 11071 (_src.LdFrag addr:$src2))))))]>, 11072 Sched<[sched.Folded, ReadAfterLd]>; 11073} 11074 11075multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11076 string OpcodeStr, X86SchedWriteWidths sched, 11077 Predicate prd> { 11078 let Predicates = [prd] in 11079 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11080 v8i64_info, v64i8_info>, EVEX_V512; 11081 let Predicates = [prd, HasVLX] in { 11082 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11083 v4i64x_info, v32i8x_info>, EVEX_V256; 11084 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11085 v2i64x_info, v16i8x_info>, EVEX_V128; 11086 } 11087} 11088 11089defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11090 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11091 11092// Transforms to swizzle an immediate to enable better matching when 11093// memory operand isn't in the right place. 11094def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{ 11095 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11096 uint8_t Imm = N->getZExtValue(); 11097 // Swap bits 1/4 and 3/6. 11098 uint8_t NewImm = Imm & 0xa5; 11099 if (Imm & 0x02) NewImm |= 0x10; 11100 if (Imm & 0x10) NewImm |= 0x02; 11101 if (Imm & 0x08) NewImm |= 0x40; 11102 if (Imm & 0x40) NewImm |= 0x08; 11103 return getI8Imm(NewImm, SDLoc(N)); 11104}]>; 11105def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{ 11106 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11107 uint8_t Imm = N->getZExtValue(); 11108 // Swap bits 2/4 and 3/5. 11109 uint8_t NewImm = Imm & 0xc3; 11110 if (Imm & 0x04) NewImm |= 0x10; 11111 if (Imm & 0x10) NewImm |= 0x04; 11112 if (Imm & 0x08) NewImm |= 0x20; 11113 if (Imm & 0x20) NewImm |= 0x08; 11114 return getI8Imm(NewImm, SDLoc(N)); 11115}]>; 11116def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{ 11117 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11118 uint8_t Imm = N->getZExtValue(); 11119 // Swap bits 1/2 and 5/6. 11120 uint8_t NewImm = Imm & 0x99; 11121 if (Imm & 0x02) NewImm |= 0x04; 11122 if (Imm & 0x04) NewImm |= 0x02; 11123 if (Imm & 0x20) NewImm |= 0x40; 11124 if (Imm & 0x40) NewImm |= 0x20; 11125 return getI8Imm(NewImm, SDLoc(N)); 11126}]>; 11127def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{ 11128 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11129 uint8_t Imm = N->getZExtValue(); 11130 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11131 uint8_t NewImm = Imm & 0x81; 11132 if (Imm & 0x02) NewImm |= 0x04; 11133 if (Imm & 0x04) NewImm |= 0x10; 11134 if (Imm & 0x08) NewImm |= 0x40; 11135 if (Imm & 0x10) NewImm |= 0x02; 11136 if (Imm & 0x20) NewImm |= 0x08; 11137 if (Imm & 0x40) NewImm |= 0x20; 11138 return getI8Imm(NewImm, SDLoc(N)); 11139}]>; 11140def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{ 11141 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11142 uint8_t Imm = N->getZExtValue(); 11143 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11144 uint8_t NewImm = Imm & 0x81; 11145 if (Imm & 0x02) NewImm |= 0x10; 11146 if (Imm & 0x04) NewImm |= 0x02; 11147 if (Imm & 0x08) NewImm |= 0x20; 11148 if (Imm & 0x10) NewImm |= 0x04; 11149 if (Imm & 0x20) NewImm |= 0x40; 11150 if (Imm & 0x40) NewImm |= 0x08; 11151 return getI8Imm(NewImm, SDLoc(N)); 11152}]>; 11153 11154multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11155 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11156 string Name>{ 11157 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11158 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11159 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11160 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11161 (OpNode (_.VT _.RC:$src1), 11162 (_.VT _.RC:$src2), 11163 (_.VT _.RC:$src3), 11164 (i8 imm:$src4)), 1, 1>, 11165 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11166 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11167 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11168 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11169 (OpNode (_.VT _.RC:$src1), 11170 (_.VT _.RC:$src2), 11171 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11172 (i8 imm:$src4)), 1, 0>, 11173 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11174 Sched<[sched.Folded, ReadAfterLd]>; 11175 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11176 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11177 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11178 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11179 (OpNode (_.VT _.RC:$src1), 11180 (_.VT _.RC:$src2), 11181 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 11182 (i8 imm:$src4)), 1, 0>, EVEX_B, 11183 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11184 Sched<[sched.Folded, ReadAfterLd]>; 11185 }// Constraints = "$src1 = $dst" 11186 11187 // Additional patterns for matching passthru operand in other positions. 11188 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11189 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11190 _.RC:$src1)), 11191 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11192 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11193 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11194 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), 11195 _.RC:$src1)), 11196 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11197 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11198 11199 // Additional patterns for matching loads in other positions. 11200 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), 11201 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), 11202 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11203 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11204 def : Pat<(_.VT (OpNode _.RC:$src1, 11205 (bitconvert (_.LdFrag addr:$src3)), 11206 _.RC:$src2, (i8 imm:$src4))), 11207 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11208 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11209 11210 // Additional patterns for matching zero masking with loads in other 11211 // positions. 11212 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11213 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11214 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11215 _.ImmAllZerosV)), 11216 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11217 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11218 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11219 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11220 _.RC:$src2, (i8 imm:$src4)), 11221 _.ImmAllZerosV)), 11222 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11223 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11224 11225 // Additional patterns for matching masked loads with different 11226 // operand orders. 11227 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11228 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11229 _.RC:$src2, (i8 imm:$src4)), 11230 _.RC:$src1)), 11231 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11232 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11233 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11234 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11235 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11236 _.RC:$src1)), 11237 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11238 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11239 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11240 (OpNode _.RC:$src2, _.RC:$src1, 11241 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), 11242 _.RC:$src1)), 11243 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11244 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11245 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11246 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11247 _.RC:$src1, (i8 imm:$src4)), 11248 _.RC:$src1)), 11249 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11250 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; 11251 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11252 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11253 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), 11254 _.RC:$src1)), 11255 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11256 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; 11257 11258 // Additional patterns for matching broadcasts in other positions. 11259 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11260 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), 11261 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11262 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11263 def : Pat<(_.VT (OpNode _.RC:$src1, 11264 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11265 _.RC:$src2, (i8 imm:$src4))), 11266 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11267 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11268 11269 // Additional patterns for matching zero masking with broadcasts in other 11270 // positions. 11271 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11272 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11273 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11274 _.ImmAllZerosV)), 11275 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11276 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11277 (VPTERNLOG321_imm8 imm:$src4))>; 11278 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11279 (OpNode _.RC:$src1, 11280 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11281 _.RC:$src2, (i8 imm:$src4)), 11282 _.ImmAllZerosV)), 11283 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11284 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11285 (VPTERNLOG132_imm8 imm:$src4))>; 11286 11287 // Additional patterns for matching masked broadcasts with different 11288 // operand orders. 11289 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11290 (OpNode _.RC:$src1, 11291 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11292 _.RC:$src2, (i8 imm:$src4)), 11293 _.RC:$src1)), 11294 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11295 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11296 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11297 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11298 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11299 _.RC:$src1)), 11300 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11301 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11302 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11303 (OpNode _.RC:$src2, _.RC:$src1, 11304 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11305 (i8 imm:$src4)), _.RC:$src1)), 11306 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11307 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11308 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11309 (OpNode _.RC:$src2, 11310 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11311 _.RC:$src1, (i8 imm:$src4)), 11312 _.RC:$src1)), 11313 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11314 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; 11315 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11316 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11317 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), 11318 _.RC:$src1)), 11319 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11320 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; 11321} 11322 11323multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11324 AVX512VLVectorVTInfo _> { 11325 let Predicates = [HasAVX512] in 11326 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11327 _.info512, NAME>, EVEX_V512; 11328 let Predicates = [HasAVX512, HasVLX] in { 11329 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11330 _.info128, NAME>, EVEX_V128; 11331 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11332 _.info256, NAME>, EVEX_V256; 11333 } 11334} 11335 11336defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11337 avx512vl_i32_info>; 11338defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11339 avx512vl_i64_info>, VEX_W; 11340 11341// Patterns to implement vnot using vpternlog instead of creating all ones 11342// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11343// so that the result is only dependent on src0. But we use the same source 11344// for all operands to prevent a false dependency. 11345// TODO: We should maybe have a more generalized algorithm for folding to 11346// vpternlog. 11347let Predicates = [HasAVX512] in { 11348 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))), 11349 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11350} 11351 11352let Predicates = [HasAVX512, NoVLX] in { 11353 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))), 11354 (EXTRACT_SUBREG 11355 (VPTERNLOGQZrri 11356 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11357 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11358 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11359 (i8 15)), sub_xmm)>; 11360 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))), 11361 (EXTRACT_SUBREG 11362 (VPTERNLOGQZrri 11363 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11364 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11365 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11366 (i8 15)), sub_ymm)>; 11367} 11368 11369let Predicates = [HasVLX] in { 11370 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))), 11371 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11372 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))), 11373 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11374} 11375 11376//===----------------------------------------------------------------------===// 11377// AVX-512 - FixupImm 11378//===----------------------------------------------------------------------===// 11379 11380multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 11381 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11382 X86VectorVTInfo TblVT>{ 11383 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11384 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11385 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11386 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11387 (OpNode (_.VT _.RC:$src1), 11388 (_.VT _.RC:$src2), 11389 (TblVT.VT _.RC:$src3), 11390 (i32 imm:$src4), 11391 (i32 FROUND_CURRENT))>, Sched<[sched]>; 11392 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11393 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11394 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11395 (OpNode (_.VT _.RC:$src1), 11396 (_.VT _.RC:$src2), 11397 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11398 (i32 imm:$src4), 11399 (i32 FROUND_CURRENT))>, 11400 Sched<[sched.Folded, ReadAfterLd]>; 11401 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11402 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11403 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11404 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11405 (OpNode (_.VT _.RC:$src1), 11406 (_.VT _.RC:$src2), 11407 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), 11408 (i32 imm:$src4), 11409 (i32 FROUND_CURRENT))>, 11410 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 11411 } // Constraints = "$src1 = $dst" 11412} 11413 11414multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11415 SDNode OpNode, X86FoldableSchedWrite sched, 11416 X86VectorVTInfo _, X86VectorVTInfo TblVT>{ 11417let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11418 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11419 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11420 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11421 "$src2, $src3, {sae}, $src4", 11422 (OpNode (_.VT _.RC:$src1), 11423 (_.VT _.RC:$src2), 11424 (TblVT.VT _.RC:$src3), 11425 (i32 imm:$src4), 11426 (i32 FROUND_NO_EXC))>, 11427 EVEX_B, Sched<[sched]>; 11428 } 11429} 11430 11431multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 11432 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11433 X86VectorVTInfo _src3VT> { 11434 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11435 ExeDomain = _.ExeDomain in { 11436 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11437 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11438 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11439 (OpNode (_.VT _.RC:$src1), 11440 (_.VT _.RC:$src2), 11441 (_src3VT.VT _src3VT.RC:$src3), 11442 (i32 imm:$src4), 11443 (i32 FROUND_CURRENT))>, Sched<[sched]>; 11444 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11445 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11446 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11447 "$src2, $src3, {sae}, $src4", 11448 (OpNode (_.VT _.RC:$src1), 11449 (_.VT _.RC:$src2), 11450 (_src3VT.VT _src3VT.RC:$src3), 11451 (i32 imm:$src4), 11452 (i32 FROUND_NO_EXC))>, 11453 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 11454 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11455 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11456 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11457 (OpNode (_.VT _.RC:$src1), 11458 (_.VT _.RC:$src2), 11459 (_src3VT.VT (scalar_to_vector 11460 (_src3VT.ScalarLdFrag addr:$src3))), 11461 (i32 imm:$src4), 11462 (i32 FROUND_CURRENT))>, 11463 Sched<[sched.Folded, ReadAfterLd]>; 11464 } 11465} 11466 11467multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11468 AVX512VLVectorVTInfo _Vec, 11469 AVX512VLVectorVTInfo _Tbl> { 11470 let Predicates = [HasAVX512] in 11471 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, 11472 _Vec.info512, _Tbl.info512>, 11473 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, 11474 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11475 EVEX_4V, EVEX_V512; 11476 let Predicates = [HasAVX512, HasVLX] in { 11477 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM, 11478 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11479 EVEX_4V, EVEX_V128; 11480 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM, 11481 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11482 EVEX_4V, EVEX_V256; 11483 } 11484} 11485 11486defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, 11487 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11488 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11489defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, 11490 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11491 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11492defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11493 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11494defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11495 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11496 11497// Patterns used to select SSE scalar fp arithmetic instructions from 11498// either: 11499// 11500// (1) a scalar fp operation followed by a blend 11501// 11502// The effect is that the backend no longer emits unnecessary vector 11503// insert instructions immediately after SSE scalar fp instructions 11504// like addss or mulss. 11505// 11506// For example, given the following code: 11507// __m128 foo(__m128 A, __m128 B) { 11508// A[0] += B[0]; 11509// return A; 11510// } 11511// 11512// Previously we generated: 11513// addss %xmm0, %xmm1 11514// movss %xmm1, %xmm0 11515// 11516// We now generate: 11517// addss %xmm1, %xmm0 11518// 11519// (2) a vector packed single/double fp operation followed by a vector insert 11520// 11521// The effect is that the backend converts the packed fp instruction 11522// followed by a vector insert into a single SSE scalar fp instruction. 11523// 11524// For example, given the following code: 11525// __m128 foo(__m128 A, __m128 B) { 11526// __m128 C = A + B; 11527// return (__m128) {c[0], a[1], a[2], a[3]}; 11528// } 11529// 11530// Previously we generated: 11531// addps %xmm0, %xmm1 11532// movss %xmm1, %xmm0 11533// 11534// We now generate: 11535// addss %xmm1, %xmm0 11536 11537// TODO: Some canonicalization in lowering would simplify the number of 11538// patterns we have to try to match. 11539multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode, 11540 X86VectorVTInfo _, PatLeaf ZeroFP> { 11541 let Predicates = [HasAVX512] in { 11542 // extracted scalar math op with insert via movss 11543 def : Pat<(MoveNode 11544 (_.VT VR128X:$dst), 11545 (_.VT (scalar_to_vector 11546 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11547 _.FRC:$src)))), 11548 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, 11549 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 11550 11551 // extracted masked scalar math op with insert via movss 11552 def : Pat<(MoveNode (_.VT VR128X:$src1), 11553 (scalar_to_vector 11554 (X86selects VK1WM:$mask, 11555 (Op (_.EltVT 11556 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11557 _.FRC:$src2), 11558 _.FRC:$src0))), 11559 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) 11560 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11561 VK1WM:$mask, _.VT:$src1, 11562 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11563 11564 // extracted masked scalar math op with insert via movss 11565 def : Pat<(MoveNode (_.VT VR128X:$src1), 11566 (scalar_to_vector 11567 (X86selects VK1WM:$mask, 11568 (Op (_.EltVT 11569 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11570 _.FRC:$src2), (_.EltVT ZeroFP)))), 11571 (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 11572 VK1WM:$mask, _.VT:$src1, 11573 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11574 } 11575} 11576 11577defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 11578defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 11579defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 11580defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 11581 11582defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 11583defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 11584defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 11585defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 11586 11587multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, 11588 SDNode Move, X86VectorVTInfo _> { 11589 let Predicates = [HasAVX512] in { 11590 def : Pat<(_.VT (Move _.VT:$dst, 11591 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11592 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>; 11593 } 11594} 11595 11596defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 11597defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 11598 11599multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, 11600 SDNode Move, X86VectorVTInfo _, 11601 bits<8> ImmV> { 11602 let Predicates = [HasAVX512] in { 11603 def : Pat<(_.VT (Move _.VT:$dst, 11604 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11605 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src, 11606 (i32 ImmV))>; 11607 } 11608} 11609 11610defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss, 11611 v4f32x_info, 0x01>; 11612defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss, 11613 v4f32x_info, 0x02>; 11614defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd, 11615 v2f64x_info, 0x01>; 11616defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd, 11617 v2f64x_info, 0x02>; 11618 11619//===----------------------------------------------------------------------===// 11620// AES instructions 11621//===----------------------------------------------------------------------===// 11622 11623multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 11624 let Predicates = [HasVLX, HasVAES] in { 11625 defm Z128 : AESI_binop_rm_int<Op, OpStr, 11626 !cast<Intrinsic>(IntPrefix), 11627 loadv2i64, 0, VR128X, i128mem>, 11628 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 11629 defm Z256 : AESI_binop_rm_int<Op, OpStr, 11630 !cast<Intrinsic>(IntPrefix##"_256"), 11631 loadv4i64, 0, VR256X, i256mem>, 11632 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 11633 } 11634 let Predicates = [HasAVX512, HasVAES] in 11635 defm Z : AESI_binop_rm_int<Op, OpStr, 11636 !cast<Intrinsic>(IntPrefix##"_512"), 11637 loadv8i64, 0, VR512, i512mem>, 11638 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 11639} 11640 11641defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 11642defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 11643defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 11644defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 11645 11646//===----------------------------------------------------------------------===// 11647// PCLMUL instructions - Carry less multiplication 11648//===----------------------------------------------------------------------===// 11649 11650let Predicates = [HasAVX512, HasVPCLMULQDQ] in 11651defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 11652 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 11653 11654let Predicates = [HasVLX, HasVPCLMULQDQ] in { 11655defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 11656 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 11657 11658defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 11659 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 11660 EVEX_CD8<64, CD8VF>, VEX_WIG; 11661} 11662 11663// Aliases 11664defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 11665defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 11666defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 11667 11668//===----------------------------------------------------------------------===// 11669// VBMI2 11670//===----------------------------------------------------------------------===// 11671 11672multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 11673 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11674 let Constraints = "$src1 = $dst", 11675 ExeDomain = VTI.ExeDomain in { 11676 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11677 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11678 "$src3, $src2", "$src2, $src3", 11679 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 11680 AVX512FMA3Base, Sched<[sched]>; 11681 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11682 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11683 "$src3, $src2", "$src2, $src3", 11684 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11685 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>, 11686 AVX512FMA3Base, 11687 Sched<[sched.Folded, ReadAfterLd]>; 11688 } 11689} 11690 11691multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11692 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 11693 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 11694 let Constraints = "$src1 = $dst", 11695 ExeDomain = VTI.ExeDomain in 11696 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11697 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 11698 "${src3}"##VTI.BroadcastStr##", $src2", 11699 "$src2, ${src3}"##VTI.BroadcastStr, 11700 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11701 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, 11702 AVX512FMA3Base, EVEX_B, 11703 Sched<[sched.Folded, ReadAfterLd]>; 11704} 11705 11706multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 11707 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11708 let Predicates = [HasVBMI2] in 11709 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11710 EVEX_V512; 11711 let Predicates = [HasVBMI2, HasVLX] in { 11712 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11713 EVEX_V256; 11714 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11715 EVEX_V128; 11716 } 11717} 11718 11719multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 11720 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11721 let Predicates = [HasVBMI2] in 11722 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11723 EVEX_V512; 11724 let Predicates = [HasVBMI2, HasVLX] in { 11725 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11726 EVEX_V256; 11727 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11728 EVEX_V128; 11729 } 11730} 11731multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 11732 SDNode OpNode, X86SchedWriteWidths sched> { 11733 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched, 11734 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 11735 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched, 11736 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11737 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched, 11738 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 11739} 11740 11741multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 11742 SDNode OpNode, X86SchedWriteWidths sched> { 11743 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched, 11744 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 11745 VEX_W, EVEX_CD8<16, CD8VF>; 11746 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp, 11747 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11748 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode, 11749 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11750} 11751 11752// Concat & Shift 11753defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 11754defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 11755defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 11756defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 11757 11758// Compress 11759defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 11760 avx512vl_i8_info, HasVBMI2>, EVEX, 11761 NotMemoryFoldable; 11762defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 11763 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 11764 NotMemoryFoldable; 11765// Expand 11766defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 11767 avx512vl_i8_info, HasVBMI2>, EVEX; 11768defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 11769 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 11770 11771//===----------------------------------------------------------------------===// 11772// VNNI 11773//===----------------------------------------------------------------------===// 11774 11775let Constraints = "$src1 = $dst" in 11776multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11777 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11778 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11779 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11780 "$src3, $src2", "$src2, $src3", 11781 (VTI.VT (OpNode VTI.RC:$src1, 11782 VTI.RC:$src2, VTI.RC:$src3))>, 11783 EVEX_4V, T8PD, Sched<[sched]>; 11784 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11785 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11786 "$src3, $src2", "$src2, $src3", 11787 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11788 (VTI.VT (bitconvert 11789 (VTI.LdFrag addr:$src3)))))>, 11790 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 11791 Sched<[sched.Folded, ReadAfterLd]>; 11792 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11793 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 11794 OpStr, "${src3}"##VTI.BroadcastStr##", $src2", 11795 "$src2, ${src3}"##VTI.BroadcastStr, 11796 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11797 (VTI.VT (X86VBroadcast 11798 (VTI.ScalarLdFrag addr:$src3))))>, 11799 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 11800 T8PD, Sched<[sched.Folded, ReadAfterLd]>; 11801} 11802 11803multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 11804 X86SchedWriteWidths sched> { 11805 let Predicates = [HasVNNI] in 11806 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512; 11807 let Predicates = [HasVNNI, HasVLX] in { 11808 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256; 11809 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128; 11810 } 11811} 11812 11813// FIXME: Is there a better scheduler class for VPDP? 11814defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>; 11815defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>; 11816defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>; 11817defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>; 11818 11819//===----------------------------------------------------------------------===// 11820// Bit Algorithms 11821//===----------------------------------------------------------------------===// 11822 11823// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 11824defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 11825 avx512vl_i8_info, HasBITALG>; 11826defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 11827 avx512vl_i16_info, HasBITALG>, VEX_W; 11828 11829defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 11830defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 11831 11832multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11833 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 11834 (ins VTI.RC:$src1, VTI.RC:$src2), 11835 "vpshufbitqmb", 11836 "$src2, $src1", "$src1, $src2", 11837 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11838 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 11839 Sched<[sched]>; 11840 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 11841 (ins VTI.RC:$src1, VTI.MemOp:$src2), 11842 "vpshufbitqmb", 11843 "$src2, $src1", "$src1, $src2", 11844 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11845 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>, 11846 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 11847 Sched<[sched.Folded, ReadAfterLd]>; 11848} 11849 11850multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11851 let Predicates = [HasBITALG] in 11852 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 11853 let Predicates = [HasBITALG, HasVLX] in { 11854 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 11855 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 11856 } 11857} 11858 11859// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 11860defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 11861 11862//===----------------------------------------------------------------------===// 11863// GFNI 11864//===----------------------------------------------------------------------===// 11865 11866multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11867 X86SchedWriteWidths sched> { 11868 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11869 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 11870 EVEX_V512; 11871 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11872 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 11873 EVEX_V256; 11874 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 11875 EVEX_V128; 11876 } 11877} 11878 11879defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 11880 SchedWriteVecALU>, 11881 EVEX_CD8<8, CD8VF>, T8PD; 11882 11883multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 11884 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11885 X86VectorVTInfo BcstVTI> 11886 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 11887 let ExeDomain = VTI.ExeDomain in 11888 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11889 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 11890 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", 11891 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", 11892 (OpNode (VTI.VT VTI.RC:$src1), 11893 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), 11894 (i8 imm:$src3))>, EVEX_B, 11895 Sched<[sched.Folded, ReadAfterLd]>; 11896} 11897 11898multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11899 X86SchedWriteWidths sched> { 11900 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11901 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 11902 v64i8_info, v8i64_info>, EVEX_V512; 11903 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11904 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 11905 v32i8x_info, v4i64x_info>, EVEX_V256; 11906 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 11907 v16i8x_info, v2i64x_info>, EVEX_V128; 11908 } 11909} 11910 11911defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 11912 X86GF2P8affineinvqb, SchedWriteVecIMul>, 11913 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11914defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 11915 X86GF2P8affineqb, SchedWriteVecIMul>, 11916 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11917 11918 11919//===----------------------------------------------------------------------===// 11920// AVX5124FMAPS 11921//===----------------------------------------------------------------------===// 11922 11923let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 11924 Constraints = "$src1 = $dst" in { 11925defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 11926 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11927 "v4fmaddps", "$src3, $src2", "$src2, $src3", 11928 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11929 Sched<[SchedWriteFMA.ZMM.Folded]>; 11930 11931defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 11932 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11933 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 11934 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11935 Sched<[SchedWriteFMA.ZMM.Folded]>; 11936 11937defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 11938 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 11939 "v4fmaddss", "$src3, $src2", "$src2, $src3", 11940 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 11941 Sched<[SchedWriteFMA.Scl.Folded]>; 11942 11943defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 11944 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 11945 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 11946 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 11947 Sched<[SchedWriteFMA.Scl.Folded]>; 11948} 11949 11950//===----------------------------------------------------------------------===// 11951// AVX5124VNNIW 11952//===----------------------------------------------------------------------===// 11953 11954let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 11955 Constraints = "$src1 = $dst" in { 11956defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 11957 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11958 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 11959 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11960 Sched<[SchedWriteFMA.ZMM.Folded]>; 11961 11962defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 11963 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11964 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 11965 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11966 Sched<[SchedWriteFMA.ZMM.Folded]>; 11967} 11968 11969